2019-11-04 04:40:47 +11:00
|
|
|
// Code generated from gen/RISCV64.rules; DO NOT EDIT.
|
|
|
|
|
// generated with: cd gen; go run *.go
|
|
|
|
|
|
|
|
|
|
package ssa
|
|
|
|
|
|
|
|
|
|
import "math"
|
|
|
|
|
import "cmd/compile/internal/types"
|
|
|
|
|
|
|
|
|
|
func rewriteValueRISCV64(v *Value) bool {
|
|
|
|
|
switch v.Op {
|
|
|
|
|
case OpAdd16:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64ADD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAdd32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64ADD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAdd32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FADDS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAdd64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64ADD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAdd64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FADDD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAdd8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64ADD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAddPtr:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64ADD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAddr:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MOVaddr
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAnd16:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64AND
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAnd32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64AND
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAnd64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64AND
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAnd8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64AND
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAndB:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64AND
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAvg64u:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpAvg64u(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpClosureCall:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64CALLclosure
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCom16:
|
2020-03-03 03:43:02 +11:00
|
|
|
v.Op = OpRISCV64NOT
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCom32:
|
2020-03-03 03:43:02 +11:00
|
|
|
v.Op = OpRISCV64NOT
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCom64:
|
2020-03-03 03:43:02 +11:00
|
|
|
v.Op = OpRISCV64NOT
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCom8:
|
2020-03-03 03:43:02 +11:00
|
|
|
v.Op = OpRISCV64NOT
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConst16:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MOVHconst
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConst32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MOVWconst
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConst32F:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpConst32F(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConst64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MOVDconst
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConst64F:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpConst64F(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConst8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MOVBconst
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConstBool:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MOVBconst
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConstNil:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpConstNil(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConvert:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MOVconvert
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt32Fto32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTWS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt32Fto64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTLS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt32Fto64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTDS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt32to32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTSW
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt32to64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTDW
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt64Fto32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTWD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt64Fto32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTSD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt64Fto64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTLD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt64to32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTSL
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt64to64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTDL
|
|
|
|
|
return true
|
2020-02-28 17:04:16 -08:00
|
|
|
case OpCvtBoolToUint8:
|
|
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpDiv16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv16u:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpDiv16u(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv32:
|
2020-02-24 13:53:53 -08:00
|
|
|
v.Op = OpRISCV64DIVW
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FDIVS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv32u:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64DIVUW
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv64:
|
2020-02-24 13:53:53 -08:00
|
|
|
v.Op = OpRISCV64DIV
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FDIVD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv64u:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64DIVU
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpDiv8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv8u:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpDiv8u(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpEq16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpEq16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpEq32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpEq32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpEq32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FEQS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpEq64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpEq64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpEq64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FEQD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpEq8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpEq8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpEqB:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpEqB(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpEqPtr:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpEqPtr(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpGeq32F:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpGeq32F(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpGeq64F:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpGeq64F(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpGetCallerPC:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64LoweredGetCallerPC
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpGetCallerSP:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64LoweredGetCallerSP
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpGetClosurePtr:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64LoweredGetClosurePtr
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpGreater32F:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpGreater32F(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpGreater64F:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpGreater64F(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpHmul32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpHmul32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpHmul32u:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpHmul32u(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpHmul64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MULH
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpHmul64u:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MULHU
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpInterCall:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64CALLinter
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpIsInBounds:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpLess64U
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpIsNonNil:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpIsNonNil(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpIsSliceInBounds:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpLeq64U
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLeq16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq16U:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLeq16U(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLeq32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FLES
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq32U:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLeq32U(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLeq64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FLED
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq64U:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLeq64U(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLeq8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq8U:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLeq8U(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLess16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess16U:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLess16U(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLess32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FLTS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess32U:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLess32U(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64SLT
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FLTD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess64U:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64SLTU
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLess8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess8U:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLess8U(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLoad:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLoad(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLocalAddr:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLocalAddr(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh16x16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh16x16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh16x32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh16x32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh16x64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh16x64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh16x8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh16x8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh32x16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh32x16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh32x32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh32x32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh32x64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh32x64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh32x8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh32x8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh64x16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh64x16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh64x32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh64x32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh64x64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh64x64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh64x8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh64x8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh8x16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh8x16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh8x32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh8x32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh8x64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh8x64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh8x8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh8x8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMod16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpMod16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMod16u:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpMod16u(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMod32:
|
2020-02-24 13:53:53 -08:00
|
|
|
v.Op = OpRISCV64REMW
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMod32u:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64REMUW
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMod64:
|
2020-02-24 13:53:53 -08:00
|
|
|
v.Op = OpRISCV64REM
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMod64u:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64REMU
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMod8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpMod8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMod8u:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpMod8u(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMove:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpMove(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMul16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpMul16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMul32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MULW
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMul32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FMULS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMul64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MUL
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMul64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FMULD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMul8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpMul8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeg16:
|
2020-03-03 03:45:22 +11:00
|
|
|
v.Op = OpRISCV64NEG
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeg32:
|
2020-03-03 03:45:22 +11:00
|
|
|
v.Op = OpRISCV64NEG
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeg32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FNEGS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeg64:
|
2020-03-03 03:45:22 +11:00
|
|
|
v.Op = OpRISCV64NEG
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeg64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FNEGD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeg8:
|
2020-03-03 03:45:22 +11:00
|
|
|
v.Op = OpRISCV64NEG
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeq16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpNeq16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeq32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpNeq32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeq32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FNES
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeq64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpNeq64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeq64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FNED
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeq8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpNeq8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeqB:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64XOR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeqPtr:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpNeqPtr(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNilCheck:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64LoweredNilCheck
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNot:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpNot(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpOffPtr:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpOffPtr(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpOr16:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64OR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpOr32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64OR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpOr64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64OR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpOr8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64OR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpOrB:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64OR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpPanicBounds:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpPanicBounds(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64ADD:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64ADD(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64ADDI:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64ADDI(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVBUload:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVBUload(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVBload:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVBload(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVBstore:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVBstore(v)
|
2020-03-02 04:26:54 +11:00
|
|
|
case OpRISCV64MOVBstorezero:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVBstorezero(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVDconst:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVDconst(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVDload:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVDload(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVDstore:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVDstore(v)
|
2020-03-02 04:26:54 +11:00
|
|
|
case OpRISCV64MOVDstorezero:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVDstorezero(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVHUload:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVHUload(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVHload:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVHload(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVHstore:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVHstore(v)
|
2020-03-02 04:26:54 +11:00
|
|
|
case OpRISCV64MOVHstorezero:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVHstorezero(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVWUload:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVWUload(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVWload:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVWload(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVWstore:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVWstore(v)
|
2020-03-02 04:26:54 +11:00
|
|
|
case OpRISCV64MOVWstorezero:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVWstorezero(v)
|
2020-03-02 04:23:12 +11:00
|
|
|
case OpRISCV64SUB:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64SUB(v)
|
2020-03-02 04:24:35 +11:00
|
|
|
case OpRISCV64SUBW:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64SUBW(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRotateLeft16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRotateLeft16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRotateLeft32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRotateLeft32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRotateLeft64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRotateLeft64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRotateLeft8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRotateLeft8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRound32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRound64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh16Ux16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh16Ux16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh16Ux32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh16Ux32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh16Ux64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh16Ux64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh16Ux8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh16Ux8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh16x16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh16x16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh16x32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh16x32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh16x64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh16x64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh16x8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh16x8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh32Ux16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh32Ux16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh32Ux32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh32Ux32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh32Ux64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh32Ux64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh32Ux8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh32Ux8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh32x16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh32x16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh32x32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh32x32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh32x64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh32x64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh32x8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh32x8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh64Ux16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh64Ux16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh64Ux32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh64Ux32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh64Ux64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh64Ux64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh64Ux8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh64Ux8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh64x16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh64x16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh64x32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh64x32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh64x64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh64x64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh64x8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh64x8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh8Ux16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh8Ux16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh8Ux32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh8Ux32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh8Ux64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh8Ux64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh8Ux8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh8Ux8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh8x16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh8x16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh8x32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh8x32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh8x64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh8x64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh8x8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh8x8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSignExt16to32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpSignExt16to32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSignExt16to64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpSignExt16to64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSignExt32to64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpSignExt32to64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSignExt8to16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpSignExt8to16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSignExt8to32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpSignExt8to32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSignExt8to64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpSignExt8to64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSlicemask:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpSlicemask(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSqrt:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FSQRTD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpStaticCall:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64CALLstatic
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpStore:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpStore(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSub16:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64SUB
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSub32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64SUB
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSub32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FSUBS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSub64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64SUB
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSub64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FSUBD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSub8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64SUB
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSubPtr:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64SUB
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpTrunc16to8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpTrunc32to16:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpTrunc32to8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpTrunc64to16:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpTrunc64to32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpTrunc64to8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpWB:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64LoweredWB
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpXor16:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64XOR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpXor32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64XOR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpXor64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64XOR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpXor8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64XOR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpZero:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpZero(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpZeroExt16to32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpZeroExt16to32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpZeroExt16to64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpZeroExt16to64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpZeroExt32to64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpZeroExt32to64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpZeroExt8to16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpZeroExt8to16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpZeroExt8to32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpZeroExt8to32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpZeroExt8to64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpZeroExt8to64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpAvg64u(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Avg64u <t> x y)
|
|
|
|
|
// result: (ADD (ADD <t> (SRLI <t> [1] x) (SRLI <t> [1] y)) (ANDI <t> [1] (AND <t> x y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64ADD)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64ADD, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64SRLI, t)
|
|
|
|
|
v1.AuxInt = 1
|
|
|
|
|
v1.AddArg(x)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SRLI, t)
|
|
|
|
|
v2.AuxInt = 1
|
|
|
|
|
v2.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64ANDI, t)
|
|
|
|
|
v3.AuxInt = 1
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpRISCV64AND, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v4.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3.AddArg(v4)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v3)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpConst32F(v *Value) bool {
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Const32F [val])
|
|
|
|
|
// result: (FMVSX (MOVWconst [int64(int32(math.Float32bits(float32(math.Float64frombits(uint64(val))))))]))
|
|
|
|
|
for {
|
|
|
|
|
val := v.AuxInt
|
|
|
|
|
v.reset(OpRISCV64FMVSX)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVWconst, typ.UInt32)
|
|
|
|
|
v0.AuxInt = int64(int32(math.Float32bits(float32(math.Float64frombits(uint64(val))))))
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpConst64F(v *Value) bool {
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Const64F [val])
|
|
|
|
|
// result: (FMVDX (MOVDconst [val]))
|
|
|
|
|
for {
|
|
|
|
|
val := v.AuxInt
|
|
|
|
|
v.reset(OpRISCV64FMVDX)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
|
|
|
|
v0.AuxInt = val
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpConstNil(v *Value) bool {
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (ConstNil)
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v.reset(OpRISCV64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpDiv16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Div16 x y)
|
|
|
|
|
// result: (DIVW (SignExt16to32 x) (SignExt16to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64DIVW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpDiv16u(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Div16u x y)
|
|
|
|
|
// result: (DIVUW (ZeroExt16to32 x) (ZeroExt16to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64DIVUW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpDiv8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Div8 x y)
|
|
|
|
|
// result: (DIVW (SignExt8to32 x) (SignExt8to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64DIVW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpDiv8u(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Div8u x y)
|
|
|
|
|
// result: (DIVUW (ZeroExt8to32 x) (ZeroExt8to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64DIVUW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpEq16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Eq16 x y)
|
|
|
|
|
// result: (SEQZ (ZeroExt16to64 (SUB <x.Type> x y)))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SEQZ)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpEq32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Eq32 x y)
|
2020-02-26 03:58:59 +11:00
|
|
|
// result: (SEQZ (SUBW <x.Type> x y))
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SEQZ)
|
2020-02-26 03:58:59 +11:00
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SUBW, x.Type)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpEq64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Eq64 x y)
|
|
|
|
|
// result: (SEQZ (SUB <x.Type> x y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SEQZ)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpEq8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Eq8 x y)
|
|
|
|
|
// result: (SEQZ (ZeroExt8to64 (SUB <x.Type> x y)))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SEQZ)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpEqB(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (EqB x y)
|
|
|
|
|
// result: (XORI [1] (XOR <typ.Bool> x y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64XORI)
|
|
|
|
|
v.AuxInt = 1
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64XOR, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpEqPtr(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (EqPtr x y)
|
|
|
|
|
// result: (SEQZ (SUB <x.Type> x y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SEQZ)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpGeq32F(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (Geq32F x y)
|
|
|
|
|
// result: (FLES y x)
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64FLES)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpGeq64F(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (Geq64F x y)
|
|
|
|
|
// result: (FLED y x)
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64FLED)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpGreater32F(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (Greater32F x y)
|
|
|
|
|
// result: (FLTS y x)
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64FLTS)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpGreater64F(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (Greater64F x y)
|
|
|
|
|
// result: (FLTD y x)
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64FLTD)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpHmul32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Hmul32 x y)
|
|
|
|
|
// result: (SRAI [32] (MUL (SignExt32to64 x) (SignExt32to64 y)))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRAI)
|
|
|
|
|
v.AuxInt = 32
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MUL, typ.Int64)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
|
|
|
|
|
v1.AddArg(x)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
|
|
|
|
|
v2.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpHmul32u(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Hmul32u x y)
|
|
|
|
|
// result: (SRLI [32] (MUL (ZeroExt32to64 x) (ZeroExt32to64 y)))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2020-01-23 14:28:04 -08:00
|
|
|
v.reset(OpRISCV64SRLI)
|
|
|
|
|
v.AuxInt = 32
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MUL, typ.Int64)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v2.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, v2)
|
2020-01-23 14:28:04 -08:00
|
|
|
v.AddArg(v0)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpIsNonNil(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (IsNonNil p)
|
|
|
|
|
// result: (NeqPtr (MOVDconst) p)
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
p := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNeqPtr)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, p)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLeq16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Leq16 x y)
|
|
|
|
|
// result: (Not (Less16 y x))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNot)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLess16, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLeq16U(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Leq16U x y)
|
|
|
|
|
// result: (Not (Less16U y x))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNot)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLess16U, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLeq32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Leq32 x y)
|
|
|
|
|
// result: (Not (Less32 y x))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNot)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLess32, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLeq32U(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Leq32U x y)
|
|
|
|
|
// result: (Not (Less32U y x))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNot)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLess32U, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLeq64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Leq64 x y)
|
|
|
|
|
// result: (Not (Less64 y x))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNot)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLess64, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLeq64U(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Leq64U x y)
|
|
|
|
|
// result: (Not (Less64U y x))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNot)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLess64U, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLeq8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Leq8 x y)
|
|
|
|
|
// result: (Not (Less8 y x))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNot)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLess8, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLeq8U(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Leq8U x y)
|
|
|
|
|
// result: (Not (Less8U y x))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNot)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLess8U, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLess16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Less16 x y)
|
|
|
|
|
// result: (SLT (SignExt16to64 x) (SignExt16to64 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SLT)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLess16U(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Less16U x y)
|
|
|
|
|
// result: (SLTU (ZeroExt16to64 x) (ZeroExt16to64 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SLTU)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLess32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Less32 x y)
|
|
|
|
|
// result: (SLT (SignExt32to64 x) (SignExt32to64 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SLT)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLess32U(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Less32U x y)
|
|
|
|
|
// result: (SLTU (ZeroExt32to64 x) (ZeroExt32to64 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SLTU)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLess8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Less8 x y)
|
|
|
|
|
// result: (SLT (SignExt8to64 x) (SignExt8to64 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SLT)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLess8U(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Less8U x y)
|
|
|
|
|
// result: (SLTU (ZeroExt8to64 x) (ZeroExt8to64 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SLTU)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLoad(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: t.IsBoolean()
|
|
|
|
|
// result: (MOVBUload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(t.IsBoolean()) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBUload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: ( is8BitInt(t) && isSigned(t))
|
|
|
|
|
// result: (MOVBload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is8BitInt(t) && isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: ( is8BitInt(t) && !isSigned(t))
|
|
|
|
|
// result: (MOVBUload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is8BitInt(t) && !isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBUload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: (is16BitInt(t) && isSigned(t))
|
|
|
|
|
// result: (MOVHload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is16BitInt(t) && isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: (is16BitInt(t) && !isSigned(t))
|
|
|
|
|
// result: (MOVHUload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is16BitInt(t) && !isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHUload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: (is32BitInt(t) && isSigned(t))
|
|
|
|
|
// result: (MOVWload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32BitInt(t) && isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: (is32BitInt(t) && !isSigned(t))
|
|
|
|
|
// result: (MOVWUload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32BitInt(t) && !isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWUload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: (is64BitInt(t) || isPtr(t))
|
|
|
|
|
// result: (MOVDload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is64BitInt(t) || isPtr(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: is32BitFloat(t)
|
|
|
|
|
// result: (FMOVWload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32BitFloat(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64FMOVWload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: is64BitFloat(t)
|
|
|
|
|
// result: (FMOVDload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is64BitFloat(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64FMOVDload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLocalAddr(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (LocalAddr {sym} base _)
|
|
|
|
|
// result: (MOVaddr {sym} base)
|
|
|
|
|
for {
|
|
|
|
|
sym := v.Aux
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
base := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVaddr)
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(base)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh16x16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh16x16 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg16, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh16x32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh16x32 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg16, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh16x64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Lsh16x64 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg16, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh16x8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh16x8 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg16, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh32x16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh32x16 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg32, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh32x32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh32x32 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg32, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh32x64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Lsh32x64 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg32, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh32x8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh32x8 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg32, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh64x16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh64x16 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg64, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh64x32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh64x32 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg64, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh64x64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Lsh64x64 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg64, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh64x8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh64x8 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg64, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh8x16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh8x16 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg8, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh8x32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh8x32 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg8, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh8x64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Lsh8x64 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg8, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh8x8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh8x8 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg8, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpMod16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Mod16 x y)
|
|
|
|
|
// result: (REMW (SignExt16to32 x) (SignExt16to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64REMW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpMod16u(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Mod16u x y)
|
|
|
|
|
// result: (REMUW (ZeroExt16to32 x) (ZeroExt16to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64REMUW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpMod8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Mod8 x y)
|
|
|
|
|
// result: (REMW (SignExt8to32 x) (SignExt8to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64REMW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpMod8u(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Mod8u x y)
|
|
|
|
|
// result: (REMUW (ZeroExt8to32 x) (ZeroExt8to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64REMUW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpMove(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Move [0] _ _ mem)
|
|
|
|
|
// result: mem
|
|
|
|
|
for {
|
|
|
|
|
if v.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_2
|
2019-10-30 10:29:47 -07:00
|
|
|
v.copyOf(mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Move [1] dst src mem)
|
|
|
|
|
// result: (MOVBstore dst (MOVBload src mem) mem)
|
|
|
|
|
for {
|
|
|
|
|
if v.AuxInt != 1 {
|
|
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
dst := v_0
|
|
|
|
|
src := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVBstore)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVBload, typ.Int8)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(src, mem)
|
|
|
|
|
v.AddArg3(dst, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Move [2] dst src mem)
|
|
|
|
|
// result: (MOVHstore dst (MOVHload src mem) mem)
|
|
|
|
|
for {
|
|
|
|
|
if v.AuxInt != 2 {
|
|
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
dst := v_0
|
|
|
|
|
src := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVHstore)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVHload, typ.Int16)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(src, mem)
|
|
|
|
|
v.AddArg3(dst, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Move [4] dst src mem)
|
|
|
|
|
// result: (MOVWstore dst (MOVWload src mem) mem)
|
|
|
|
|
for {
|
|
|
|
|
if v.AuxInt != 4 {
|
|
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
dst := v_0
|
|
|
|
|
src := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVWstore)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVWload, typ.Int32)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(src, mem)
|
|
|
|
|
v.AddArg3(dst, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Move [8] dst src mem)
|
|
|
|
|
// result: (MOVDstore dst (MOVDload src mem) mem)
|
|
|
|
|
for {
|
|
|
|
|
if v.AuxInt != 8 {
|
|
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
dst := v_0
|
|
|
|
|
src := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVDstore)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(src, mem)
|
|
|
|
|
v.AddArg3(dst, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Move [s] {t} dst src mem)
|
|
|
|
|
// result: (LoweredMove [t.(*types.Type).Alignment()] dst src (ADDI <src.Type> [s-moveSize(t.(*types.Type).Alignment(), config)] src) mem)
|
|
|
|
|
for {
|
|
|
|
|
s := v.AuxInt
|
|
|
|
|
t := v.Aux
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
dst := v_0
|
|
|
|
|
src := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64LoweredMove)
|
|
|
|
|
v.AuxInt = t.(*types.Type).Alignment()
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64ADDI, src.Type)
|
|
|
|
|
v0.AuxInt = s - moveSize(t.(*types.Type).Alignment(), config)
|
|
|
|
|
v0.AddArg(src)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg4(dst, src, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpMul16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Mul16 x y)
|
|
|
|
|
// result: (MULW (SignExt16to32 x) (SignExt16to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MULW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpMul8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Mul8 x y)
|
|
|
|
|
// result: (MULW (SignExt8to32 x) (SignExt8to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MULW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpNeq16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Neq16 x y)
|
|
|
|
|
// result: (SNEZ (ZeroExt16to64 (SUB <x.Type> x y)))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SNEZ)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpNeq32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Neq32 x y)
|
2020-02-26 03:58:59 +11:00
|
|
|
// result: (SNEZ (SUBW <x.Type> x y))
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SNEZ)
|
2020-02-26 03:58:59 +11:00
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SUBW, x.Type)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpNeq64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Neq64 x y)
|
|
|
|
|
// result: (SNEZ (SUB <x.Type> x y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SNEZ)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpNeq8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Neq8 x y)
|
|
|
|
|
// result: (SNEZ (ZeroExt8to64 (SUB <x.Type> x y)))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SNEZ)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpNeqPtr(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (NeqPtr x y)
|
|
|
|
|
// result: (SNEZ (SUB <x.Type> x y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SNEZ)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpNot(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (Not x)
|
|
|
|
|
// result: (XORI [1] x)
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64XORI)
|
|
|
|
|
v.AuxInt = 1
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpOffPtr(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (OffPtr [off] ptr:(SP))
|
|
|
|
|
// result: (MOVaddr [off] ptr)
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
if ptr.Op != OpSP {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVaddr)
|
|
|
|
|
v.AuxInt = off
|
2020-01-23 14:28:04 -08:00
|
|
|
v.AddArg(ptr)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
2020-01-23 14:28:04 -08:00
|
|
|
// match: (OffPtr [off] ptr)
|
|
|
|
|
// cond: is32Bit(off)
|
|
|
|
|
// result: (ADDI [off] ptr)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-01-23 14:28:04 -08:00
|
|
|
off := v.AuxInt
|
|
|
|
|
ptr := v_0
|
|
|
|
|
if !(is32Bit(off)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ADDI)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.AddArg(ptr)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
2020-01-23 14:28:04 -08:00
|
|
|
// match: (OffPtr [off] ptr)
|
|
|
|
|
// result: (ADD (MOVDconst [off]) ptr)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-01-23 14:28:04 -08:00
|
|
|
off := v.AuxInt
|
|
|
|
|
ptr := v_0
|
|
|
|
|
v.reset(OpRISCV64ADD)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
|
|
|
|
v0.AuxInt = off
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, ptr)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpPanicBounds(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (PanicBounds [kind] x y mem)
|
|
|
|
|
// cond: boundsABI(kind) == 0
|
|
|
|
|
// result: (LoweredPanicBoundsA [kind] x y mem)
|
|
|
|
|
for {
|
|
|
|
|
kind := v.AuxInt
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(boundsABI(kind) == 0) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64LoweredPanicBoundsA)
|
|
|
|
|
v.AuxInt = kind
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(x, y, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (PanicBounds [kind] x y mem)
|
|
|
|
|
// cond: boundsABI(kind) == 1
|
|
|
|
|
// result: (LoweredPanicBoundsB [kind] x y mem)
|
|
|
|
|
for {
|
|
|
|
|
kind := v.AuxInt
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(boundsABI(kind) == 1) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64LoweredPanicBoundsB)
|
|
|
|
|
v.AuxInt = kind
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(x, y, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (PanicBounds [kind] x y mem)
|
|
|
|
|
// cond: boundsABI(kind) == 2
|
|
|
|
|
// result: (LoweredPanicBoundsC [kind] x y mem)
|
|
|
|
|
for {
|
|
|
|
|
kind := v.AuxInt
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(boundsABI(kind) == 2) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64LoweredPanicBoundsC)
|
|
|
|
|
v.AuxInt = kind
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(x, y, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64ADD(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (ADD (MOVDconst [off]) ptr)
|
|
|
|
|
// cond: is32Bit(off)
|
|
|
|
|
// result: (ADDI [off] ptr)
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
cmd/compile: use loops to handle commutative ops in rules
Prior to this change, we generated additional rules at rulegen time
for all possible combinations of args to commutative ops.
This is simple and works well, but leads to lots of generated rules.
This in turn has increased the size of the compiler,
made it hard to compile package ssa on small machines,
and provided a disincentive to mark some ops as commutative.
This change reworks how we handle commutative ops.
Instead of generating a rule per argument permutation,
we generate a series of nested loops, one for each commutative op.
Each loop tries both possible argument orderings.
I also considered attempting to canonicalize the inputs to the
rewrite rules. However, because either or both arguments might be
nothing more than an identifier, and because there can be arbitrary
conditions to evaluate during matching, I did not see how to proceed.
The duplicate rule detection now sorts arguments to commutative ops,
so that it can detect commutative-only duplicates.
There may be further optimizations to the new generated code.
In particular, we may not be removing as many bounds checks as before;
I have not investigated deeply. If more work here is needed,
we could do it with more hints or with improvements to the prove pass.
This change has almost no impact on the generated code.
It does not pass toolstash-check, however. In a handful of functions,
for reasons I do not understand, there are minor position changes.
For the entire series ending at this change,
there is negligible compiler performance impact.
The compiler binary shrinks by about 15%,
and package ssa shrinks by about 25%.
Package ssa also compiles ~25% faster with ~25% less memory.
Change-Id: Ia2ee9ceae7be08a17342319d4e31b0bb238a2ee4
Reviewed-on: https://go-review.googlesource.com/c/go/+/213703
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-06 22:24:02 -08:00
|
|
|
if v_0.Op != OpRISCV64MOVDconst {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
off := v_0.AuxInt
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_1
|
cmd/compile: use loops to handle commutative ops in rules
Prior to this change, we generated additional rules at rulegen time
for all possible combinations of args to commutative ops.
This is simple and works well, but leads to lots of generated rules.
This in turn has increased the size of the compiler,
made it hard to compile package ssa on small machines,
and provided a disincentive to mark some ops as commutative.
This change reworks how we handle commutative ops.
Instead of generating a rule per argument permutation,
we generate a series of nested loops, one for each commutative op.
Each loop tries both possible argument orderings.
I also considered attempting to canonicalize the inputs to the
rewrite rules. However, because either or both arguments might be
nothing more than an identifier, and because there can be arbitrary
conditions to evaluate during matching, I did not see how to proceed.
The duplicate rule detection now sorts arguments to commutative ops,
so that it can detect commutative-only duplicates.
There may be further optimizations to the new generated code.
In particular, we may not be removing as many bounds checks as before;
I have not investigated deeply. If more work here is needed,
we could do it with more hints or with improvements to the prove pass.
This change has almost no impact on the generated code.
It does not pass toolstash-check, however. In a handful of functions,
for reasons I do not understand, there are minor position changes.
For the entire series ending at this change,
there is negligible compiler performance impact.
The compiler binary shrinks by about 15%,
and package ssa shrinks by about 25%.
Package ssa also compiles ~25% faster with ~25% less memory.
Change-Id: Ia2ee9ceae7be08a17342319d4e31b0bb238a2ee4
Reviewed-on: https://go-review.googlesource.com/c/go/+/213703
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-06 22:24:02 -08:00
|
|
|
if !(is32Bit(off)) {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ADDI)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
}
|
cmd/compile: use loops to handle commutative ops in rules
Prior to this change, we generated additional rules at rulegen time
for all possible combinations of args to commutative ops.
This is simple and works well, but leads to lots of generated rules.
This in turn has increased the size of the compiler,
made it hard to compile package ssa on small machines,
and provided a disincentive to mark some ops as commutative.
This change reworks how we handle commutative ops.
Instead of generating a rule per argument permutation,
we generate a series of nested loops, one for each commutative op.
Each loop tries both possible argument orderings.
I also considered attempting to canonicalize the inputs to the
rewrite rules. However, because either or both arguments might be
nothing more than an identifier, and because there can be arbitrary
conditions to evaluate during matching, I did not see how to proceed.
The duplicate rule detection now sorts arguments to commutative ops,
so that it can detect commutative-only duplicates.
There may be further optimizations to the new generated code.
In particular, we may not be removing as many bounds checks as before;
I have not investigated deeply. If more work here is needed,
we could do it with more hints or with improvements to the prove pass.
This change has almost no impact on the generated code.
It does not pass toolstash-check, however. In a handful of functions,
for reasons I do not understand, there are minor position changes.
For the entire series ending at this change,
there is negligible compiler performance impact.
The compiler binary shrinks by about 15%,
and package ssa shrinks by about 25%.
Package ssa also compiles ~25% faster with ~25% less memory.
Change-Id: Ia2ee9ceae7be08a17342319d4e31b0bb238a2ee4
Reviewed-on: https://go-review.googlesource.com/c/go/+/213703
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-06 22:24:02 -08:00
|
|
|
break
|
2019-11-04 04:40:47 +11:00
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64ADDI(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (ADDI [c] (MOVaddr [d] {s} x))
|
|
|
|
|
// cond: is32Bit(c+d)
|
|
|
|
|
// result: (MOVaddr [c+d] {s} x)
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_0.AuxInt
|
|
|
|
|
s := v_0.Aux
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
if !(is32Bit(c + d)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVaddr)
|
|
|
|
|
v.AuxInt = c + d
|
|
|
|
|
v.Aux = s
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ADDI [0] x)
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
if v.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-10-30 10:29:47 -07:00
|
|
|
v.copyOf(x)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVBUload(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVBUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBUload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVBUload [off1] {sym} (ADDI [off2] base) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVBUload [off1+off2] {sym} base mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1 + off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBUload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVBload(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVBload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVBload [off1] {sym} (ADDI [off2] base) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVBload [off1+off2] {sym} base mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1 + off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVBstore(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVBstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
|
|
|
|
|
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(base, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVBstore [off1] {sym} (ADDI [off2] base) val mem)
|
|
|
|
|
// cond: is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVBstore [off1+off2] {sym} base val mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1 + off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(base, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
2020-03-02 04:26:54 +11:00
|
|
|
// match: (MOVBstore [off] {sym} ptr (MOVBconst [0]) mem)
|
|
|
|
|
// result: (MOVBstorezero [off] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVBconst || v_1.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVBstorezero)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVBstorezero(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (MOVBstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem)
|
|
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v_1
|
|
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBstorezero)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVBstorezero [off1] {sym} (ADDI [off2] ptr) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVBstorezero [off1+off2] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v_1
|
|
|
|
|
if !(is32Bit(off1 + off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBstorezero)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2019-11-04 04:40:47 +11:00
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVDconst(v *Value) bool {
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (MOVDconst <t> [c])
|
|
|
|
|
// cond: !is32Bit(c) && int32(c) < 0
|
|
|
|
|
// result: (ADD (SLLI <t> [32] (MOVDconst [c>>32+1])) (MOVDconst [int64(int32(c))]))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
if !(!is32Bit(c) && int32(c) < 0) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ADD)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
|
|
|
|
|
v0.AuxInt = 32
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
|
|
|
|
v1.AuxInt = c>>32 + 1
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
|
|
|
|
v2.AuxInt = int64(int32(c))
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVDconst <t> [c])
|
|
|
|
|
// cond: !is32Bit(c) && int32(c) >= 0
|
|
|
|
|
// result: (ADD (SLLI <t> [32] (MOVDconst [c>>32+0])) (MOVDconst [int64(int32(c))]))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
if !(!is32Bit(c) && int32(c) >= 0) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ADD)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
|
|
|
|
|
v0.AuxInt = 32
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
|
|
|
|
v1.AuxInt = c>>32 + 0
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
|
|
|
|
v2.AuxInt = int64(int32(c))
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVDload(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVDload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVDload [off1] {sym} (ADDI [off2] base) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVDload [off1+off2] {sym} base mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1 + off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVDstore(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVDstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
|
|
|
|
|
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(base, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVDstore [off1] {sym} (ADDI [off2] base) val mem)
|
|
|
|
|
// cond: is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVDstore [off1+off2] {sym} base val mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1 + off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(base, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
2020-03-02 04:26:54 +11:00
|
|
|
// match: (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem)
|
|
|
|
|
// result: (MOVDstorezero [off] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVDconst || v_1.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVDstorezero)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVDstorezero(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (MOVDstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem)
|
|
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v_1
|
|
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDstorezero)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVDstorezero [off1] {sym} (ADDI [off2] ptr) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVDstorezero [off1+off2] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v_1
|
|
|
|
|
if !(is32Bit(off1 + off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDstorezero)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2019-11-04 04:40:47 +11:00
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVHUload(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVHUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHUload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHUload [off1] {sym} (ADDI [off2] base) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVHUload [off1+off2] {sym} base mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1 + off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHUload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVHload(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVHload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHload [off1] {sym} (ADDI [off2] base) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVHload [off1+off2] {sym} base mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1 + off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVHstore(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVHstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
|
|
|
|
|
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(base, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHstore [off1] {sym} (ADDI [off2] base) val mem)
|
|
|
|
|
// cond: is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVHstore [off1+off2] {sym} base val mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1 + off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(base, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
2020-03-02 04:26:54 +11:00
|
|
|
// match: (MOVHstore [off] {sym} ptr (MOVHconst [0]) mem)
|
|
|
|
|
// result: (MOVHstorezero [off] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVHconst || v_1.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVHstorezero)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVHstorezero(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (MOVHstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem)
|
|
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v_1
|
|
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHstorezero)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHstorezero [off1] {sym} (ADDI [off2] ptr) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVHstorezero [off1+off2] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v_1
|
|
|
|
|
if !(is32Bit(off1 + off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHstorezero)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2019-11-04 04:40:47 +11:00
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVWUload(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVWUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWUload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWUload [off1] {sym} (ADDI [off2] base) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVWUload [off1+off2] {sym} base mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1 + off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWUload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVWload(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVWload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWload [off1] {sym} (ADDI [off2] base) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVWload [off1+off2] {sym} base mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1 + off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVWstore(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVWstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
|
|
|
|
|
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(base, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWstore [off1] {sym} (ADDI [off2] base) val mem)
|
|
|
|
|
// cond: is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVWstore [off1+off2] {sym} base val mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32Bit(off1 + off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(base, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
2020-03-02 04:26:54 +11:00
|
|
|
// match: (MOVWstore [off] {sym} ptr (MOVWconst [0]) mem)
|
|
|
|
|
// result: (MOVWstorezero [off] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVWconst || v_1.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVWstorezero)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVWstorezero(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (MOVWstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem)
|
|
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v_1
|
|
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWstorezero)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWstorezero [off1] {sym} (ADDI [off2] ptr) mem)
|
|
|
|
|
// cond: is32Bit(off1+off2)
|
|
|
|
|
// result: (MOVWstorezero [off1+off2] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v_1
|
|
|
|
|
if !(is32Bit(off1 + off2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWstorezero)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2019-11-04 04:40:47 +11:00
|
|
|
return false
|
|
|
|
|
}
|
2020-03-02 04:23:12 +11:00
|
|
|
func rewriteValueRISCV64_OpRISCV64SUB(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (SUB x (MOVBconst [val]))
|
|
|
|
|
// cond: is32Bit(-val)
|
|
|
|
|
// result: (ADDI [-val] x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVBconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
val := v_1.AuxInt
|
|
|
|
|
if !(is32Bit(-val)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ADDI)
|
|
|
|
|
v.AuxInt = -val
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB x (MOVHconst [val]))
|
|
|
|
|
// cond: is32Bit(-val)
|
|
|
|
|
// result: (ADDI [-val] x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVHconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
val := v_1.AuxInt
|
|
|
|
|
if !(is32Bit(-val)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ADDI)
|
|
|
|
|
v.AuxInt = -val
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB x (MOVWconst [val]))
|
|
|
|
|
// cond: is32Bit(-val)
|
|
|
|
|
// result: (ADDI [-val] x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVWconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
val := v_1.AuxInt
|
|
|
|
|
if !(is32Bit(-val)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ADDI)
|
|
|
|
|
v.AuxInt = -val
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB x (MOVDconst [val]))
|
|
|
|
|
// cond: is32Bit(-val)
|
|
|
|
|
// result: (ADDI [-val] x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
val := v_1.AuxInt
|
|
|
|
|
if !(is32Bit(-val)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ADDI)
|
|
|
|
|
v.AuxInt = -val
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2020-03-02 04:24:35 +11:00
|
|
|
// match: (SUB x (MOVBconst [0]))
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVBconst || v_1.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.copyOf(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB x (MOVHconst [0]))
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVHconst || v_1.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.copyOf(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB x (MOVWconst [0]))
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVWconst || v_1.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.copyOf(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB x (MOVDconst [0]))
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVDconst || v_1.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.copyOf(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2020-03-03 03:45:22 +11:00
|
|
|
// match: (SUB (MOVBconst [0]) x)
|
|
|
|
|
// result: (NEG x)
|
|
|
|
|
for {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVBconst || v_0.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64NEG)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB (MOVHconst [0]) x)
|
|
|
|
|
// result: (NEG x)
|
|
|
|
|
for {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVHconst || v_0.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64NEG)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB (MOVWconst [0]) x)
|
|
|
|
|
// result: (NEG x)
|
|
|
|
|
for {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVWconst || v_0.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64NEG)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB (MOVDconst [0]) x)
|
|
|
|
|
// result: (NEG x)
|
|
|
|
|
for {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVDconst || v_0.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64NEG)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2020-03-02 04:24:35 +11:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueRISCV64_OpRISCV64SUBW(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (SUBW x (MOVWconst [0]))
|
|
|
|
|
// result: (ADDIW [0] x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVWconst || v_1.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ADDIW)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2020-03-03 03:45:22 +11:00
|
|
|
// match: (SUBW (MOVDconst [0]) x)
|
|
|
|
|
// result: (NEGW x)
|
|
|
|
|
for {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVDconst || v_0.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64NEGW)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2020-03-02 04:23:12 +11:00
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRotateLeft16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (RotateLeft16 <t> x (MOVHconst [c]))
|
|
|
|
|
// result: (Or16 (Lsh16x64 <t> x (MOVHconst [c&15])) (Rsh16Ux64 <t> x (MOVHconst [-c&15])))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_1.Op != OpRISCV64MOVHconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpOr16)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLsh16x64, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64MOVHconst, typ.UInt16)
|
|
|
|
|
v1.AuxInt = c & 15
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpRsh16Ux64, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64MOVHconst, typ.UInt16)
|
|
|
|
|
v3.AuxInt = -c & 15
|
2020-02-26 11:29:34 -08:00
|
|
|
v2.AddArg2(x, v3)
|
|
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRotateLeft32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (RotateLeft32 <t> x (MOVWconst [c]))
|
|
|
|
|
// result: (Or32 (Lsh32x64 <t> x (MOVWconst [c&31])) (Rsh32Ux64 <t> x (MOVWconst [-c&31])))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_1.Op != OpRISCV64MOVWconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpOr32)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLsh32x64, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64MOVWconst, typ.UInt32)
|
|
|
|
|
v1.AuxInt = c & 31
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpRsh32Ux64, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64MOVWconst, typ.UInt32)
|
|
|
|
|
v3.AuxInt = -c & 31
|
2020-02-26 11:29:34 -08:00
|
|
|
v2.AddArg2(x, v3)
|
|
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRotateLeft64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (RotateLeft64 <t> x (MOVDconst [c]))
|
|
|
|
|
// result: (Or64 (Lsh64x64 <t> x (MOVDconst [c&63])) (Rsh64Ux64 <t> x (MOVDconst [-c&63])))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_1.Op != OpRISCV64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpOr64)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLsh64x64, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
|
|
|
|
v1.AuxInt = c & 63
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpRsh64Ux64, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
|
|
|
|
v3.AuxInt = -c & 63
|
2020-02-26 11:29:34 -08:00
|
|
|
v2.AddArg2(x, v3)
|
|
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRotateLeft8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (RotateLeft8 <t> x (MOVBconst [c]))
|
|
|
|
|
// result: (Or8 (Lsh8x64 <t> x (MOVBconst [c&7])) (Rsh8Ux64 <t> x (MOVBconst [-c&7])))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_1.Op != OpRISCV64MOVBconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpOr8)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLsh8x64, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64MOVBconst, typ.UInt8)
|
|
|
|
|
v1.AuxInt = c & 7
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpRsh8Ux64, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64MOVBconst, typ.UInt8)
|
|
|
|
|
v3.AuxInt = -c & 7
|
2020-02-26 11:29:34 -08:00
|
|
|
v2.AddArg2(x, v3)
|
|
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh16Ux16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh16Ux16 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg16, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh16Ux32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh16Ux32 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg16, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh16Ux64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh16Ux64 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg16, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh16Ux8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh16Ux8 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg16, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh16x16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh16x16 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
|
|
|
|
v2.AuxInt = -1
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh16x32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh16x32 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
|
|
|
|
v2.AuxInt = -1
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh16x64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh16x64 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
|
|
|
|
v2.AuxInt = -1
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh16x8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh16x8 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
|
|
|
|
v2.AuxInt = -1
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh32Ux16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh32Ux16 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg32, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh32Ux32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh32Ux32 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg32, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh32Ux64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh32Ux64 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg32, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh32Ux8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh32Ux8 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg32, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh32x16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh32x16 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
|
|
|
|
v2.AuxInt = -1
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh32x32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh32x32 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
|
|
|
|
v2.AuxInt = -1
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh32x64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh32x64 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
|
|
|
|
v2.AuxInt = -1
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh32x8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh32x8 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
|
|
|
|
v2.AuxInt = -1
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh64Ux16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh64Ux16 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg64, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh64Ux32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh64Ux32 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg64, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh64Ux64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Rsh64Ux64 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg64, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh64Ux8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh64Ux8 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg64, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh64x16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh64x16 <t> x y)
|
|
|
|
|
// result: (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
|
|
|
|
v1.AuxInt = -1
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, v1)
|
|
|
|
|
v.AddArg2(x, v0)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh64x32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh64x32 <t> x y)
|
|
|
|
|
// result: (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
|
|
|
|
v1.AuxInt = -1
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, v1)
|
|
|
|
|
v.AddArg2(x, v0)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh64x64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Rsh64x64 <t> x y)
|
|
|
|
|
// result: (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
|
|
|
|
v1.AuxInt = -1
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, v1)
|
|
|
|
|
v.AddArg2(x, v0)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh64x8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh64x8 <t> x y)
|
|
|
|
|
// result: (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
|
|
|
|
v1.AuxInt = -1
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
|
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, v1)
|
|
|
|
|
v.AddArg2(x, v0)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh8Ux16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh8Ux16 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg8, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh8Ux32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh8Ux32 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg8, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh8Ux64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh8Ux64 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg8, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh8Ux8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh8Ux8 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg8, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh8x16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh8x16 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
|
|
|
|
v2.AuxInt = -1
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh8x32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh8x32 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
|
|
|
|
v2.AuxInt = -1
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh8x64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh8x64 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
|
|
|
|
v2.AuxInt = -1
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh8x8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh8x8 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
|
|
|
|
v2.AuxInt = -1
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
|
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpSignExt16to32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (SignExt16to32 <t> x)
|
|
|
|
|
// result: (SRAI [48] (SLLI <t> [48] x))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRAI)
|
|
|
|
|
v.AuxInt = 48
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
|
|
|
|
|
v0.AuxInt = 48
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpSignExt16to64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (SignExt16to64 <t> x)
|
|
|
|
|
// result: (SRAI [48] (SLLI <t> [48] x))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRAI)
|
|
|
|
|
v.AuxInt = 48
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
|
|
|
|
|
v0.AuxInt = 48
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpSignExt32to64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (SignExt32to64 <t> x)
|
2020-02-26 04:00:10 +11:00
|
|
|
// result: (ADDIW [0] x)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2020-02-26 04:00:10 +11:00
|
|
|
v.reset(OpRISCV64ADDIW)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
v.AddArg(x)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpSignExt8to16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (SignExt8to16 <t> x)
|
|
|
|
|
// result: (SRAI [56] (SLLI <t> [56] x))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRAI)
|
|
|
|
|
v.AuxInt = 56
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
|
|
|
|
|
v0.AuxInt = 56
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpSignExt8to32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (SignExt8to32 <t> x)
|
|
|
|
|
// result: (SRAI [56] (SLLI <t> [56] x))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRAI)
|
|
|
|
|
v.AuxInt = 56
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
|
|
|
|
|
v0.AuxInt = 56
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpSignExt8to64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (SignExt8to64 <t> x)
|
|
|
|
|
// result: (SRAI [56] (SLLI <t> [56] x))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRAI)
|
|
|
|
|
v.AuxInt = 56
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
|
|
|
|
|
v0.AuxInt = 56
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpSlicemask(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Slicemask <t> x)
|
2020-03-16 03:29:19 +11:00
|
|
|
// result: (NOT (SRAI <t> [63] (ADDI <t> [-1] x)))
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2020-03-16 03:29:19 +11:00
|
|
|
v.reset(OpRISCV64NOT)
|
2020-02-26 04:01:29 +11:00
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRAI, t)
|
|
|
|
|
v0.AuxInt = 63
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64ADDI, t)
|
|
|
|
|
v1.AuxInt = -1
|
|
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpStore(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (Store {t} ptr val mem)
|
|
|
|
|
// cond: t.(*types.Type).Size() == 1
|
|
|
|
|
// result: (MOVBstore ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Aux
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(t.(*types.Type).Size() == 1) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBstore)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Store {t} ptr val mem)
|
|
|
|
|
// cond: t.(*types.Type).Size() == 2
|
|
|
|
|
// result: (MOVHstore ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Aux
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(t.(*types.Type).Size() == 2) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHstore)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Store {t} ptr val mem)
|
|
|
|
|
// cond: t.(*types.Type).Size() == 4 && !is32BitFloat(val.Type)
|
|
|
|
|
// result: (MOVWstore ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Aux
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(t.(*types.Type).Size() == 4 && !is32BitFloat(val.Type)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWstore)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Store {t} ptr val mem)
|
|
|
|
|
// cond: t.(*types.Type).Size() == 8 && !is64BitFloat(val.Type)
|
|
|
|
|
// result: (MOVDstore ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Aux
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(t.(*types.Type).Size() == 8 && !is64BitFloat(val.Type)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDstore)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Store {t} ptr val mem)
|
|
|
|
|
// cond: t.(*types.Type).Size() == 4 && is32BitFloat(val.Type)
|
|
|
|
|
// result: (FMOVWstore ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Aux
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(t.(*types.Type).Size() == 4 && is32BitFloat(val.Type)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64FMOVWstore)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Store {t} ptr val mem)
|
|
|
|
|
// cond: t.(*types.Type).Size() == 8 && is64BitFloat(val.Type)
|
|
|
|
|
// result: (FMOVDstore ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Aux
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(t.(*types.Type).Size() == 8 && is64BitFloat(val.Type)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64FMOVDstore)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpZero(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Zero [0] _ mem)
|
|
|
|
|
// result: mem
|
|
|
|
|
for {
|
|
|
|
|
if v.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2019-10-30 10:29:47 -07:00
|
|
|
v.copyOf(mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Zero [1] ptr mem)
|
|
|
|
|
// result: (MOVBstore ptr (MOVBconst) mem)
|
|
|
|
|
for {
|
|
|
|
|
if v.AuxInt != 1 {
|
|
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVBstore)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVBconst, typ.UInt8)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Zero [2] ptr mem)
|
|
|
|
|
// result: (MOVHstore ptr (MOVHconst) mem)
|
|
|
|
|
for {
|
|
|
|
|
if v.AuxInt != 2 {
|
|
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVHstore)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVHconst, typ.UInt16)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Zero [4] ptr mem)
|
|
|
|
|
// result: (MOVWstore ptr (MOVWconst) mem)
|
|
|
|
|
for {
|
|
|
|
|
if v.AuxInt != 4 {
|
|
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVWstore)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVWconst, typ.UInt32)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Zero [8] ptr mem)
|
|
|
|
|
// result: (MOVDstore ptr (MOVDconst) mem)
|
|
|
|
|
for {
|
|
|
|
|
if v.AuxInt != 8 {
|
|
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVDstore)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Zero [s] {t} ptr mem)
|
|
|
|
|
// result: (LoweredZero [t.(*types.Type).Alignment()] ptr (ADD <ptr.Type> ptr (MOVDconst [s-moveSize(t.(*types.Type).Alignment(), config)])) mem)
|
|
|
|
|
for {
|
|
|
|
|
s := v.AuxInt
|
|
|
|
|
t := v.Aux
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64LoweredZero)
|
|
|
|
|
v.AuxInt = t.(*types.Type).Alignment()
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64ADD, ptr.Type)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
|
|
|
|
v1.AuxInt = s - moveSize(t.(*types.Type).Alignment(), config)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(ptr, v1)
|
|
|
|
|
v.AddArg3(ptr, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpZeroExt16to32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (ZeroExt16to32 <t> x)
|
|
|
|
|
// result: (SRLI [48] (SLLI <t> [48] x))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRLI)
|
|
|
|
|
v.AuxInt = 48
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
|
|
|
|
|
v0.AuxInt = 48
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpZeroExt16to64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (ZeroExt16to64 <t> x)
|
|
|
|
|
// result: (SRLI [48] (SLLI <t> [48] x))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRLI)
|
|
|
|
|
v.AuxInt = 48
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
|
|
|
|
|
v0.AuxInt = 48
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpZeroExt32to64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (ZeroExt32to64 <t> x)
|
|
|
|
|
// result: (SRLI [32] (SLLI <t> [32] x))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRLI)
|
|
|
|
|
v.AuxInt = 32
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
|
|
|
|
|
v0.AuxInt = 32
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpZeroExt8to16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (ZeroExt8to16 <t> x)
|
|
|
|
|
// result: (SRLI [56] (SLLI <t> [56] x))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRLI)
|
|
|
|
|
v.AuxInt = 56
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
|
|
|
|
|
v0.AuxInt = 56
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpZeroExt8to32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (ZeroExt8to32 <t> x)
|
|
|
|
|
// result: (SRLI [56] (SLLI <t> [56] x))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRLI)
|
|
|
|
|
v.AuxInt = 56
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
|
|
|
|
|
v0.AuxInt = 56
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpZeroExt8to64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (ZeroExt8to64 <t> x)
|
|
|
|
|
// result: (SRLI [56] (SLLI <t> [56] x))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRLI)
|
|
|
|
|
v.AuxInt = 56
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
|
|
|
|
|
v0.AuxInt = 56
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
func rewriteBlockRISCV64(b *Block) bool {
|
|
|
|
|
switch b.Kind {
|
2020-03-02 04:25:54 +11:00
|
|
|
case BlockRISCV64BNE:
|
|
|
|
|
// match: (BNE (SNEZ x) yes no)
|
|
|
|
|
// result: (BNE x yes no)
|
|
|
|
|
for b.Controls[0].Op == OpRISCV64SNEZ {
|
|
|
|
|
v_0 := b.Controls[0]
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
b.resetWithControl(BlockRISCV64BNE, x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2019-11-04 04:40:47 +11:00
|
|
|
case BlockIf:
|
|
|
|
|
// match: (If cond yes no)
|
|
|
|
|
// result: (BNE cond yes no)
|
|
|
|
|
for {
|
|
|
|
|
cond := b.Controls[0]
|
2020-03-01 13:09:09 -08:00
|
|
|
b.resetWithControl(BlockRISCV64BNE, cond)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|