2017-04-03 17:50:34 +00:00
|
|
|
// Code generated from gen/ARM64.rules; DO NOT EDIT.
|
2016-07-21 12:42:49 -04:00
|
|
|
// generated with: cd gen; go run *.go
|
|
|
|
|
|
|
|
|
|
package ssa
|
|
|
|
|
|
|
|
|
|
import "math"
|
2017-01-24 09:48:58 +00:00
|
|
|
import "cmd/internal/obj"
|
2017-04-18 12:53:25 -07:00
|
|
|
import "cmd/internal/objabi"
|
|
|
|
|
|
|
|
|
|
var _ = math.MinInt8 // in case not otherwise used
|
|
|
|
|
var _ = obj.ANOP // in case not otherwise used
|
|
|
|
|
var _ = objabi.GOROOT // in case not otherwise used
|
2016-07-21 12:42:49 -04:00
|
|
|
|
2017-03-17 10:50:20 -07:00
|
|
|
func rewriteValueARM64(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
switch v.Op {
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64ADD:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64ADD_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpARM64ADDconst:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64ADDconst_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64ADDshiftLL:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64ADDshiftLL_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64ADDshiftRA:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64ADDshiftRA_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64ADDshiftRL:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64ADDshiftRL_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64AND:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64AND_0(v) || rewriteValueARM64_OpARM64AND_10(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64ANDconst:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64ANDconst_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64ANDshiftLL:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64ANDshiftLL_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64ANDshiftRA:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64ANDshiftRA_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64ANDshiftRL:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64ANDshiftRL_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64BIC:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64BIC_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64BICconst:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64BICconst_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64BICshiftLL:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64BICshiftLL_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64BICshiftRA:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64BICshiftRA_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64BICshiftRL:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64BICshiftRL_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64CMP:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64CMP_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64CMPW:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64CMPW_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64CMPWconst:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64CMPWconst_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64CMPconst:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64CMPconst_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64CMPshiftLL:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64CMPshiftLL_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64CMPshiftRA:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64CMPshiftRA_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64CMPshiftRL:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64CMPshiftRL_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64CSELULT:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64CSELULT_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64CSELULT0:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64CSELULT0_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64DIV:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64DIV_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64DIVW:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64DIVW_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64Equal:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64Equal_0(v)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
case OpARM64FMOVDload:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64FMOVDload_0(v)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
case OpARM64FMOVDstore:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64FMOVDstore_0(v)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
case OpARM64FMOVSload:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64FMOVSload_0(v)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
case OpARM64FMOVSstore:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64FMOVSstore_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64GreaterEqual:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64GreaterEqual_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64GreaterEqualU:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64GreaterEqualU_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64GreaterThan:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64GreaterThan_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64GreaterThanU:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64GreaterThanU_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64LessEqual:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64LessEqual_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64LessEqualU:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64LessEqualU_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64LessThan:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64LessThan_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64LessThanU:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64LessThanU_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64MOD:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOD_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64MODW:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MODW_0(v)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
case OpARM64MOVBUload:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVBUload_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64MOVBUreg:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVBUreg_0(v)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
case OpARM64MOVBload:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVBload_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64MOVBreg:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVBreg_0(v)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
case OpARM64MOVBstore:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVBstore_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64MOVBstorezero:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVBstorezero_0(v)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
case OpARM64MOVDload:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVDload_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64MOVDreg:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVDreg_0(v)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
case OpARM64MOVDstore:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVDstore_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64MOVDstorezero:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVDstorezero_0(v)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
case OpARM64MOVHUload:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVHUload_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64MOVHUreg:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVHUreg_0(v)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
case OpARM64MOVHload:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVHload_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64MOVHreg:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVHreg_0(v)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
case OpARM64MOVHstore:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVHstore_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64MOVHstorezero:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVHstorezero_0(v)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
case OpARM64MOVWUload:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVWUload_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64MOVWUreg:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVWUreg_0(v)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
case OpARM64MOVWload:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVWload_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64MOVWreg:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVWreg_0(v) || rewriteValueARM64_OpARM64MOVWreg_10(v)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
case OpARM64MOVWstore:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVWstore_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64MOVWstorezero:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MOVWstorezero_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64MUL:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MUL_0(v) || rewriteValueARM64_OpARM64MUL_10(v) || rewriteValueARM64_OpARM64MUL_20(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64MULW:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MULW_0(v) || rewriteValueARM64_OpARM64MULW_10(v) || rewriteValueARM64_OpARM64MULW_20(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64MVN:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64MVN_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64NEG:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64NEG_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64NotEqual:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64NotEqual_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64OR:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64OR_0(v) || rewriteValueARM64_OpARM64OR_10(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64ORconst:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64ORconst_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64ORshiftLL:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64ORshiftLL_0(v) || rewriteValueARM64_OpARM64ORshiftLL_10(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64ORshiftRA:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64ORshiftRA_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64ORshiftRL:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64ORshiftRL_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64SLL:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64SLL_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64SLLconst:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64SLLconst_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64SRA:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64SRA_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64SRAconst:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64SRAconst_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64SRL:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64SRL_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64SRLconst:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64SRLconst_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64SUB:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64SUB_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64SUBconst:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64SUBconst_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64SUBshiftLL:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64SUBshiftLL_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64SUBshiftRA:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64SUBshiftRA_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64SUBshiftRL:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64SUBshiftRL_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64UDIV:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64UDIV_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64UDIVW:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64UDIVW_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64UMOD:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64UMOD_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64UMODW:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64UMODW_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64XOR:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64XOR_0(v)
|
2016-08-03 09:56:36 -04:00
|
|
|
case OpARM64XORconst:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64XORconst_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64XORshiftLL:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64XORshiftLL_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64XORshiftRA:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64XORshiftRA_0(v)
|
2016-08-10 13:24:03 -04:00
|
|
|
case OpARM64XORshiftRL:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpARM64XORshiftRL_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpAdd16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAdd16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpAdd32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAdd32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpAdd32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAdd32F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpAdd64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAdd64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpAdd64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAdd64F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpAdd8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAdd8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpAddPtr:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAddPtr_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpAddr:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAddr_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpAnd16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAnd16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpAnd32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAnd32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpAnd64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAnd64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpAnd8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAnd8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpAndB:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAndB_0(v)
|
2016-08-29 16:26:57 -04:00
|
|
|
case OpAtomicAdd32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAtomicAdd32_0(v)
|
2016-08-29 16:26:57 -04:00
|
|
|
case OpAtomicAdd64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAtomicAdd64_0(v)
|
2016-09-12 15:24:11 -04:00
|
|
|
case OpAtomicAnd8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAtomicAnd8_0(v)
|
2016-08-29 16:26:57 -04:00
|
|
|
case OpAtomicCompareAndSwap32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAtomicCompareAndSwap32_0(v)
|
2016-08-29 16:26:57 -04:00
|
|
|
case OpAtomicCompareAndSwap64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAtomicCompareAndSwap64_0(v)
|
2016-08-29 16:26:57 -04:00
|
|
|
case OpAtomicExchange32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAtomicExchange32_0(v)
|
2016-08-29 16:26:57 -04:00
|
|
|
case OpAtomicExchange64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAtomicExchange64_0(v)
|
2016-08-29 16:26:57 -04:00
|
|
|
case OpAtomicLoad32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAtomicLoad32_0(v)
|
2016-08-29 16:26:57 -04:00
|
|
|
case OpAtomicLoad64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAtomicLoad64_0(v)
|
2016-08-29 16:26:57 -04:00
|
|
|
case OpAtomicLoadPtr:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAtomicLoadPtr_0(v)
|
2016-09-12 15:24:11 -04:00
|
|
|
case OpAtomicOr8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAtomicOr8_0(v)
|
2016-08-29 16:26:57 -04:00
|
|
|
case OpAtomicStore32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAtomicStore32_0(v)
|
2016-08-29 16:26:57 -04:00
|
|
|
case OpAtomicStore64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAtomicStore64_0(v)
|
2016-08-29 16:26:57 -04:00
|
|
|
case OpAtomicStorePtrNoWB:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAtomicStorePtrNoWB_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpAvg64u:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpAvg64u_0(v)
|
2017-03-16 14:08:31 -07:00
|
|
|
case OpBitLen64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpBitLen64_0(v)
|
2017-03-16 22:34:38 -07:00
|
|
|
case OpBitRev16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpBitRev16_0(v)
|
2017-03-16 22:34:38 -07:00
|
|
|
case OpBitRev32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpBitRev32_0(v)
|
2017-03-16 22:34:38 -07:00
|
|
|
case OpBitRev64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpBitRev64_0(v)
|
2017-03-16 22:34:38 -07:00
|
|
|
case OpBitRev8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpBitRev8_0(v)
|
2016-08-29 16:26:57 -04:00
|
|
|
case OpBswap32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpBswap32_0(v)
|
2016-08-29 16:26:57 -04:00
|
|
|
case OpBswap64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpBswap64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpClosureCall:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpClosureCall_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCom16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCom16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCom32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCom32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCom64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCom64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCom8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCom8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpConst16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpConst16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpConst32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpConst32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpConst32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpConst32F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpConst64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpConst64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpConst64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpConst64F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpConst8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpConst8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpConstBool:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpConstBool_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpConstNil:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpConstNil_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpConvert:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpConvert_0(v)
|
2016-08-29 16:26:57 -04:00
|
|
|
case OpCtz32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCtz32_0(v)
|
2016-08-29 16:26:57 -04:00
|
|
|
case OpCtz64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCtz64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCvt32Fto32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt32Fto32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCvt32Fto32U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt32Fto32U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCvt32Fto64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt32Fto64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCvt32Fto64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt32Fto64F_0(v)
|
2016-08-16 14:17:33 -04:00
|
|
|
case OpCvt32Fto64U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt32Fto64U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCvt32Uto32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt32Uto32F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCvt32Uto64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt32Uto64F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCvt32to32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt32to32F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCvt32to64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt32to64F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCvt64Fto32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt64Fto32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCvt64Fto32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt64Fto32F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCvt64Fto32U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt64Fto32U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCvt64Fto64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt64Fto64_0(v)
|
2016-08-16 14:17:33 -04:00
|
|
|
case OpCvt64Fto64U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt64Fto64U_0(v)
|
2016-08-16 14:17:33 -04:00
|
|
|
case OpCvt64Uto32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt64Uto32F_0(v)
|
2016-08-16 14:17:33 -04:00
|
|
|
case OpCvt64Uto64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt64Uto64F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCvt64to32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt64to32F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpCvt64to64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpCvt64to64F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpDiv16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpDiv16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpDiv16u:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpDiv16u_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpDiv32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpDiv32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpDiv32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpDiv32F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpDiv32u:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpDiv32u_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpDiv64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpDiv64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpDiv64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpDiv64F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpDiv64u:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpDiv64u_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpDiv8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpDiv8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpDiv8u:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpDiv8u_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpEq16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpEq16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpEq32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpEq32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpEq32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpEq32F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpEq64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpEq64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpEq64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpEq64F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpEq8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpEq8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpEqB:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpEqB_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpEqPtr:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpEqPtr_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGeq16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGeq16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGeq16U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGeq16U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGeq32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGeq32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGeq32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGeq32F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGeq32U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGeq32U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGeq64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGeq64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGeq64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGeq64F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGeq64U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGeq64U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGeq8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGeq8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGeq8U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGeq8U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGetClosurePtr:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGetClosurePtr_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGreater16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGreater16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGreater16U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGreater16U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGreater32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGreater32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGreater32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGreater32F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGreater32U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGreater32U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGreater64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGreater64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGreater64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGreater64F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGreater64U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGreater64U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGreater8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGreater8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpGreater8U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpGreater8U_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpHmul32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpHmul32_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpHmul32u:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpHmul32u_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpHmul64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpHmul64_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpHmul64u:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpHmul64u_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpInterCall:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpInterCall_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpIsInBounds:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpIsInBounds_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpIsNonNil:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpIsNonNil_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpIsSliceInBounds:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpIsSliceInBounds_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLeq16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLeq16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLeq16U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLeq16U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLeq32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLeq32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLeq32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLeq32F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLeq32U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLeq32U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLeq64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLeq64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLeq64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLeq64F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLeq64U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLeq64U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLeq8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLeq8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLeq8U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLeq8U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLess16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLess16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLess16U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLess16U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLess32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLess32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLess32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLess32F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLess32U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLess32U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLess64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLess64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLess64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLess64F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLess64U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLess64U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLess8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLess8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLess8U:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLess8U_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpLoad:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLoad_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpLsh16x16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLsh16x16_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpLsh16x32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLsh16x32_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpLsh16x64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLsh16x64_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpLsh16x8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLsh16x8_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpLsh32x16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLsh32x16_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpLsh32x32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLsh32x32_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpLsh32x64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLsh32x64_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpLsh32x8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLsh32x8_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpLsh64x16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLsh64x16_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpLsh64x32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLsh64x32_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpLsh64x64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLsh64x64_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpLsh64x8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLsh64x8_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpLsh8x16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLsh8x16_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpLsh8x32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLsh8x32_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpLsh8x64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLsh8x64_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpLsh8x8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpLsh8x8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpMod16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpMod16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpMod16u:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpMod16u_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpMod32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpMod32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpMod32u:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpMod32u_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpMod64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpMod64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpMod64u:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpMod64u_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpMod8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpMod8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpMod8u:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpMod8u_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpMove:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpMove_0(v) || rewriteValueARM64_OpMove_10(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpMul16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpMul16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpMul32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpMul32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpMul32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpMul32F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpMul64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpMul64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpMul64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpMul64F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpMul8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpMul8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpNeg16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpNeg16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpNeg32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpNeg32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpNeg32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpNeg32F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpNeg64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpNeg64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpNeg64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpNeg64F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpNeg8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpNeg8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpNeq16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpNeq16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpNeq32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpNeq32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpNeq32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpNeq32F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpNeq64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpNeq64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpNeq64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpNeq64F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpNeq8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpNeq8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpNeqB:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpNeqB_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpNeqPtr:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpNeqPtr_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpNilCheck:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpNilCheck_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpNot:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpNot_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpOffPtr:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpOffPtr_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpOr16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpOr16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpOr32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpOr32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpOr64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpOr64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpOr8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpOr8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpOrB:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpOrB_0(v)
|
2017-02-12 22:12:12 -05:00
|
|
|
case OpRound32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRound32F_0(v)
|
2017-02-12 22:12:12 -05:00
|
|
|
case OpRound64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRound64F_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh16Ux16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh16Ux16_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh16Ux32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh16Ux32_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh16Ux64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh16Ux64_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh16Ux8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh16Ux8_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh16x16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh16x16_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh16x32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh16x32_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh16x64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh16x64_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh16x8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh16x8_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh32Ux16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh32Ux16_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh32Ux32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh32Ux32_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh32Ux64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh32Ux64_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh32Ux8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh32Ux8_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh32x16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh32x16_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh32x32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh32x32_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh32x64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh32x64_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh32x8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh32x8_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh64Ux16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh64Ux16_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh64Ux32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh64Ux32_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh64Ux64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh64Ux64_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh64Ux8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh64Ux8_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh64x16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh64x16_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh64x32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh64x32_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh64x64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh64x64_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh64x8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh64x8_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh8Ux16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh8Ux16_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh8Ux32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh8Ux32_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh8Ux64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh8Ux64_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh8Ux8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh8Ux8_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh8x16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh8x16_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh8x32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh8x32_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh8x64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh8x64_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpRsh8x8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpRsh8x8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpSignExt16to32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpSignExt16to32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpSignExt16to64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpSignExt16to64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpSignExt32to64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpSignExt32to64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpSignExt8to16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpSignExt8to16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpSignExt8to32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpSignExt8to32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpSignExt8to64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpSignExt8to64_0(v)
|
2016-10-25 15:49:52 -07:00
|
|
|
case OpSlicemask:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpSlicemask_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpSqrt:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpSqrt_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpStaticCall:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpStaticCall_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpStore:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpStore_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpSub16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpSub16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpSub32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpSub32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpSub32F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpSub32F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpSub64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpSub64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpSub64F:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpSub64F_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpSub8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpSub8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpSubPtr:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpSubPtr_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpTrunc16to8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpTrunc16to8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpTrunc32to16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpTrunc32to16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpTrunc32to8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpTrunc32to8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpTrunc64to16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpTrunc64to16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpTrunc64to32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpTrunc64to32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpTrunc64to8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpTrunc64to8_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpXor16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpXor16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpXor32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpXor32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpXor64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpXor64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpXor8:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpXor8_0(v)
|
2016-07-22 06:41:14 -04:00
|
|
|
case OpZero:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpZero_0(v) || rewriteValueARM64_OpZero_10(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpZeroExt16to32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpZeroExt16to32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpZeroExt16to64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpZeroExt16to64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpZeroExt32to64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpZeroExt32to64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpZeroExt8to16:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpZeroExt8to16_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpZeroExt8to32:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpZeroExt8to32_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
case OpZeroExt8to64:
|
2017-04-20 15:47:06 -07:00
|
|
|
return rewriteValueARM64_OpZeroExt8to64_0(v)
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ADD x (MOVDconst [c]))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (ADDconst [c] x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
c := v_1.AuxInt
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64ADDconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ADD (MOVDconst [c]) x)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (ADDconst [c] x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64ADDconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ADD x (NEG y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SUB x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64NEG {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64SUB)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ADD (NEG y) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SUB x y)
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64NEG {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v_0.Args[0]
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SUB)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (ADD x (SLLconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ADDshiftLL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64ADDshiftLL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ADD (SLLconst [c] y) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ADDshiftLL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
y := v_0.Args[0]
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ADDshiftLL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ADD x (SRLconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ADDshiftRL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64ADDshiftRL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ADD (SRLconst [c] y) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ADDshiftRL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
y := v_0.Args[0]
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ADDshiftRL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ADD x (SRAconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ADDshiftRA x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRAconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64ADDshiftRA)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ADD (SRAconst [c] y) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ADDshiftRA x y [c])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SRAconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
y := v_0.Args[0]
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ADDshiftRA)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64ADDconst_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ADDconst [off1] (MOVDaddr [off2] {sym} ptr))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDaddr [off1+off2] {sym} ptr)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
2016-08-03 09:56:36 -04:00
|
|
|
sym := v_0.Aux
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
ptr := v_0.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVDaddr)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ADDconst [0] x)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: x
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.AuxInt != 0 {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ADDconst [c] (MOVDconst [d]))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [c+d])
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
d := v_0.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = c + d
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ADDconst [c] (ADDconst [d] x))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (ADDconst [c+d] x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
c := v.AuxInt
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
d := v_0.AuxInt
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v.reset(OpARM64ADDconst)
|
|
|
|
|
v.AuxInt = c + d
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ADDconst [c] (SUBconst [d] x))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ADDconst [c-d] x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
c := v.AuxInt
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v_0 := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if v_0.Op != OpARM64SUBconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
d := v_0.AuxInt
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v.reset(OpARM64ADDconst)
|
|
|
|
|
v.AuxInt = c - d
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64ADDshiftLL_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (ADDshiftLL (MOVDconst [c]) x [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ADDconst [c] (SLLconst <x.Type> x [d]))
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ADDconst)
|
|
|
|
|
v.AuxInt = c
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = d
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ADDshiftLL x (MOVDconst [c]) [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ADDconst x [int64(uint64(c)<<uint64(d))])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64ADDconst)
|
|
|
|
|
v.AuxInt = int64(uint64(c) << uint64(d))
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-12-08 16:17:20 -08:00
|
|
|
// match: (ADDshiftLL [c] (SRLconst x [64-c]) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (RORconst [64-c] x)
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_0.AuxInt != 64-c {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
if x != v.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64RORconst)
|
|
|
|
|
v.AuxInt = 64 - c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ADDshiftLL <t> [c] (SRLconst (MOVWUreg x) [32-c]) x)
|
2017-04-28 00:19:49 +00:00
|
|
|
// cond: c < 32 && t.Size() == 4
|
2016-12-08 16:17:20 -08:00
|
|
|
// result: (RORWconst [32-c] x)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_0.AuxInt != 32-c {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v_0_0 := v_0.Args[0]
|
|
|
|
|
if v_0_0.Op != OpARM64MOVWUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0_0.Args[0]
|
|
|
|
|
if x != v.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-04-28 00:19:49 +00:00
|
|
|
if !(c < 32 && t.Size() == 4) {
|
2016-12-08 16:17:20 -08:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64RORWconst)
|
|
|
|
|
v.AuxInt = 32 - c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64ADDshiftRA_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (ADDshiftRA (MOVDconst [c]) x [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ADDconst [c] (SRAconst <x.Type> x [d]))
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ADDconst)
|
|
|
|
|
v.AuxInt = c
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRAconst, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = d
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ADDshiftRA x (MOVDconst [c]) [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ADDconst x [int64(int64(c)>>uint64(d))])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64ADDconst)
|
|
|
|
|
v.AuxInt = int64(int64(c) >> uint64(d))
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64ADDshiftRL_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (ADDshiftRL (MOVDconst [c]) x [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ADDconst [c] (SRLconst <x.Type> x [d]))
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ADDconst)
|
|
|
|
|
v.AuxInt = c
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRLconst, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = d
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ADDshiftRL x (MOVDconst [c]) [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ADDconst x [int64(uint64(c)>>uint64(d))])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64ADDconst)
|
|
|
|
|
v.AuxInt = int64(uint64(c) >> uint64(d))
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-12-08 16:17:20 -08:00
|
|
|
// match: (ADDshiftRL [c] (SLLconst x [64-c]) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (RORconst [ c] x)
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_0.AuxInt != 64-c {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
if x != v.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64RORconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ADDshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x))
|
2017-04-28 00:19:49 +00:00
|
|
|
// cond: c < 32 && t.Size() == 4
|
2016-12-08 16:17:20 -08:00
|
|
|
// result: (RORWconst [ c] x)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_0.AuxInt != 32-c {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVWUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if x != v_1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-04-28 00:19:49 +00:00
|
|
|
if !(c < 32 && t.Size() == 4) {
|
2016-12-08 16:17:20 -08:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64RORWconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64AND_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (AND x (MOVDconst [c]))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (ANDconst [c] x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
c := v_1.AuxInt
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64ANDconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (AND (MOVDconst [c]) x)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (ANDconst [c] x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64ANDconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (AND x x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
if x != v.Args[1] {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (AND x (MVN y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (BIC x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MVN {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64BIC)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (AND (MVN y) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (BIC x y)
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MVN {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y := v_0.Args[0]
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64BIC)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (AND x (SLLconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ANDshiftLL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64ANDshiftLL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (AND (SLLconst [c] y) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ANDshiftLL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
y := v_0.Args[0]
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ANDshiftLL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (AND x (SRLconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ANDshiftRL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64ANDshiftRL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (AND (SRLconst [c] y) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ANDshiftRL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
y := v_0.Args[0]
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ANDshiftRL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (AND x (SRAconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ANDshiftRA x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRAconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64ANDshiftRA)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueARM64_OpARM64AND_10(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (AND (SRAconst [c] y) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ANDshiftRA x y [c])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SRAconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
y := v_0.Args[0]
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ANDshiftRA)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64ANDconst_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ANDconst [0] _)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.AuxInt != 0 {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ANDconst [-1] x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: x
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.AuxInt != -1 {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ANDconst [c] (MOVDconst [d]))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [c&d])
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
d := v_0.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = c & d
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ANDconst [c] (ANDconst [d] x))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ANDconst [c&d] x)
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ANDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_0.AuxInt
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v.reset(OpARM64ANDconst)
|
|
|
|
|
v.AuxInt = c & d
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64ANDshiftLL_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (ANDshiftLL (MOVDconst [c]) x [d])
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (ANDconst [c] (SLLconst <x.Type> x [d]))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ANDconst)
|
|
|
|
|
v.AuxInt = c
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = d
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ANDshiftLL x (MOVDconst [c]) [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ANDconst x [int64(uint64(c)<<uint64(d))])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64ANDconst)
|
|
|
|
|
v.AuxInt = int64(uint64(c) << uint64(d))
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ANDshiftLL x y:(SLLconst x [c]) [d])
|
|
|
|
|
// cond: c==d
|
|
|
|
|
// result: y
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
if y.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := y.AuxInt
|
|
|
|
|
if x != y.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(c == d) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = y.Type
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64ANDshiftRA_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (ANDshiftRA (MOVDconst [c]) x [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ANDconst [c] (SRAconst <x.Type> x [d]))
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ANDconst)
|
|
|
|
|
v.AuxInt = c
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRAconst, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = d
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ANDshiftRA x (MOVDconst [c]) [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ANDconst x [int64(int64(c)>>uint64(d))])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64ANDconst)
|
|
|
|
|
v.AuxInt = int64(int64(c) >> uint64(d))
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ANDshiftRA x y:(SRAconst x [c]) [d])
|
|
|
|
|
// cond: c==d
|
|
|
|
|
// result: y
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
if y.Op != OpARM64SRAconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := y.AuxInt
|
|
|
|
|
if x != y.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(c == d) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = y.Type
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64ANDshiftRL_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (ANDshiftRL (MOVDconst [c]) x [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ANDconst [c] (SRLconst <x.Type> x [d]))
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ANDconst)
|
|
|
|
|
v.AuxInt = c
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRLconst, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = d
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ANDshiftRL x (MOVDconst [c]) [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ANDconst x [int64(uint64(c)>>uint64(d))])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64ANDconst)
|
|
|
|
|
v.AuxInt = int64(uint64(c) >> uint64(d))
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ANDshiftRL x y:(SRLconst x [c]) [d])
|
|
|
|
|
// cond: c==d
|
|
|
|
|
// result: y
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
if y.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := y.AuxInt
|
|
|
|
|
if x != y.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(c == d) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = y.Type
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64BIC_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (BIC x (MOVDconst [c]))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (BICconst [c] x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64BICconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (BIC x x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
if x != v.Args[1] {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (BIC x (SLLconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (BICshiftLL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64BICshiftLL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (BIC x (SRLconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (BICshiftRL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64BICshiftRL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (BIC x (SRAconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (BICshiftRA x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRAconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64BICshiftRA)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64BICconst_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (BICconst [0] x)
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: x
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.AuxInt != 0 {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (BICconst [-1] _)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [0])
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.AuxInt != -1 {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (BICconst [c] (MOVDconst [d]))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [d&^c])
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
d := v_0.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = d &^ c
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64BICshiftLL_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (BICshiftLL x (MOVDconst [c]) [d])
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (BICconst x [int64(uint64(c)<<uint64(d))])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v.AuxInt
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
c := v_1.AuxInt
|
2016-08-10 13:24:03 -04:00
|
|
|
v.reset(OpARM64BICconst)
|
|
|
|
|
v.AuxInt = int64(uint64(c) << uint64(d))
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (BICshiftLL x (SLLconst x [c]) [d])
|
|
|
|
|
// cond: c==d
|
|
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SLLconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if x != v_1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(c == d) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64BICshiftRA_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (BICshiftRA x (MOVDconst [c]) [d])
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (BICconst x [int64(int64(c)>>uint64(d))])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v.AuxInt
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
c := v_1.AuxInt
|
2016-08-10 13:24:03 -04:00
|
|
|
v.reset(OpARM64BICconst)
|
|
|
|
|
v.AuxInt = int64(int64(c) >> uint64(d))
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (BICshiftRA x (SRAconst x [c]) [d])
|
|
|
|
|
// cond: c==d
|
|
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRAconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if x != v_1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(c == d) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64BICshiftRL_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (BICshiftRL x (MOVDconst [c]) [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (BICconst x [int64(uint64(c)>>uint64(d))])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64BICconst)
|
|
|
|
|
v.AuxInt = int64(uint64(c) >> uint64(d))
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (BICshiftRL x (SRLconst x [c]) [d])
|
|
|
|
|
// cond: c==d
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if x != v_1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(c == d) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64CMP_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (CMP x (MOVDconst [c]))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CMPconst [c] x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64CMPconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CMP (MOVDconst [c]) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (InvertFlags (CMPconst [c] x))
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64InvertFlags)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = c
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CMP x (SLLconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CMPshiftLL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64CMPshiftLL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CMP (SLLconst [c] y) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (InvertFlags (CMPshiftLL x y [c]))
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
y := v_0.Args[0]
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64InvertFlags)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPshiftLL, TypeFlags)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = c
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CMP x (SRLconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CMPshiftRL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64CMPshiftRL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CMP (SRLconst [c] y) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (InvertFlags (CMPshiftRL x y [c]))
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
y := v_0.Args[0]
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64InvertFlags)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPshiftRL, TypeFlags)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = c
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CMP x (SRAconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CMPshiftRA x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRAconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64CMPshiftRA)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CMP (SRAconst [c] y) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (InvertFlags (CMPshiftRA x y [c]))
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SRAconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
y := v_0.Args[0]
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64InvertFlags)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPshiftRA, TypeFlags)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = c
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64CMPW_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (CMPW x (MOVDconst [c]))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CMPWconst [int64(int32(c))] x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64CMPWconst)
|
|
|
|
|
v.AuxInt = int64(int32(c))
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CMPW (MOVDconst [c]) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (InvertFlags (CMPWconst [int64(int32(c))] x))
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64InvertFlags)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPWconst, TypeFlags)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = int64(int32(c))
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64CMPWconst_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (CMPWconst (MOVDconst [x]) [y])
|
|
|
|
|
// cond: int32(x)==int32(y)
|
|
|
|
|
// result: (FlagEQ)
|
|
|
|
|
for {
|
|
|
|
|
y := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.AuxInt
|
|
|
|
|
if !(int32(x) == int32(y)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FlagEQ)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CMPWconst (MOVDconst [x]) [y])
|
|
|
|
|
// cond: int32(x)<int32(y) && uint32(x)<uint32(y)
|
|
|
|
|
// result: (FlagLT_ULT)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.AuxInt
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v_0 := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v_0.AuxInt
|
|
|
|
|
if !(int32(x) < int32(y) && uint32(x) < uint32(y)) {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FlagLT_ULT)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (CMPWconst (MOVDconst [x]) [y])
|
|
|
|
|
// cond: int32(x)<int32(y) && uint32(x)>uint32(y)
|
|
|
|
|
// result: (FlagLT_UGT)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.AuxInt
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v_0 := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v_0.AuxInt
|
|
|
|
|
if !(int32(x) < int32(y) && uint32(x) > uint32(y)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FlagLT_UGT)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (CMPWconst (MOVDconst [x]) [y])
|
|
|
|
|
// cond: int32(x)>int32(y) && uint32(x)<uint32(y)
|
|
|
|
|
// result: (FlagGT_ULT)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.AuxInt
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v_0 := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v_0.AuxInt
|
|
|
|
|
if !(int32(x) > int32(y) && uint32(x) < uint32(y)) {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FlagGT_ULT)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (CMPWconst (MOVDconst [x]) [y])
|
|
|
|
|
// cond: int32(x)>int32(y) && uint32(x)>uint32(y)
|
|
|
|
|
// result: (FlagGT_UGT)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.AuxInt
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v_0 := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v_0.AuxInt
|
|
|
|
|
if !(int32(x) > int32(y) && uint32(x) > uint32(y)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FlagGT_UGT)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (CMPWconst (MOVBUreg _) [c])
|
|
|
|
|
// cond: 0xff < int32(c)
|
|
|
|
|
// result: (FlagLT_ULT)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
c := v.AuxInt
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v_0 := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if v_0.Op != OpARM64MOVBUreg {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
if !(0xff < int32(c)) {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FlagLT_ULT)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CMPWconst (MOVHUreg _) [c])
|
|
|
|
|
// cond: 0xffff < int32(c)
|
|
|
|
|
// result: (FlagLT_ULT)
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVHUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(0xffff < int32(c)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FlagLT_ULT)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64CMPconst_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (CMPconst (MOVDconst [x]) [y])
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: x==y
|
|
|
|
|
// result: (FlagEQ)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.AuxInt
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v_0 := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v_0.AuxInt
|
|
|
|
|
if !(x == y) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FlagEQ)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (CMPconst (MOVDconst [x]) [y])
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: int64(x)<int64(y) && uint64(x)<uint64(y)
|
|
|
|
|
// result: (FlagLT_ULT)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.AuxInt
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v_0 := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v_0.AuxInt
|
|
|
|
|
if !(int64(x) < int64(y) && uint64(x) < uint64(y)) {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FlagLT_ULT)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (CMPconst (MOVDconst [x]) [y])
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: int64(x)<int64(y) && uint64(x)>uint64(y)
|
|
|
|
|
// result: (FlagLT_UGT)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.AuxInt
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v_0 := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v_0.AuxInt
|
|
|
|
|
if !(int64(x) < int64(y) && uint64(x) > uint64(y)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FlagLT_UGT)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (CMPconst (MOVDconst [x]) [y])
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: int64(x)>int64(y) && uint64(x)<uint64(y)
|
|
|
|
|
// result: (FlagGT_ULT)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.AuxInt
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v_0 := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v_0.AuxInt
|
|
|
|
|
if !(int64(x) > int64(y) && uint64(x) < uint64(y)) {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FlagGT_ULT)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (CMPconst (MOVDconst [x]) [y])
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: int64(x)>int64(y) && uint64(x)>uint64(y)
|
|
|
|
|
// result: (FlagGT_UGT)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.AuxInt
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v_0 := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v_0.AuxInt
|
|
|
|
|
if !(int64(x) > int64(y) && uint64(x) > uint64(y)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FlagGT_UGT)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (CMPconst (MOVBUreg _) [c])
|
|
|
|
|
// cond: 0xff < c
|
|
|
|
|
// result: (FlagLT_ULT)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
c := v.AuxInt
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v_0 := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if v_0.Op != OpARM64MOVBUreg {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
if !(0xff < c) {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FlagLT_ULT)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CMPconst (MOVHUreg _) [c])
|
|
|
|
|
// cond: 0xffff < c
|
|
|
|
|
// result: (FlagLT_ULT)
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVHUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(0xffff < c) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FlagLT_ULT)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CMPconst (MOVWUreg _) [c])
|
|
|
|
|
// cond: 0xffffffff < c
|
|
|
|
|
// result: (FlagLT_ULT)
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVWUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(0xffffffff < c) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FlagLT_ULT)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CMPconst (ANDconst _ [m]) [n])
|
|
|
|
|
// cond: 0 <= m && m < n
|
|
|
|
|
// result: (FlagLT_ULT)
|
|
|
|
|
for {
|
|
|
|
|
n := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ANDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
m := v_0.AuxInt
|
|
|
|
|
if !(0 <= m && m < n) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FlagLT_ULT)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CMPconst (SRLconst _ [c]) [n])
|
|
|
|
|
// cond: 0 <= n && 0 < c && c <= 63 && (1<<uint64(64-c)) <= uint64(n)
|
|
|
|
|
// result: (FlagLT_ULT)
|
|
|
|
|
for {
|
|
|
|
|
n := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
if !(0 <= n && 0 < c && c <= 63 && (1<<uint64(64-c)) <= uint64(n)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FlagLT_ULT)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64CMPshiftLL_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (CMPshiftLL (MOVDconst [c]) x [d])
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (InvertFlags (CMPconst [c] (SLLconst <x.Type> x [d])))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64InvertFlags)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = c
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v1.AuxInt = d
|
|
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (CMPshiftLL x (MOVDconst [c]) [d])
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (CMPconst x [int64(uint64(c)<<uint64(d))])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v.AuxInt
|
2016-07-21 12:42:49 -04:00
|
|
|
x := v.Args[0]
|
2016-08-10 13:24:03 -04:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64CMPconst)
|
|
|
|
|
v.AuxInt = int64(uint64(c) << uint64(d))
|
2016-07-21 12:42:49 -04:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64CMPshiftRA_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (CMPshiftRA (MOVDconst [c]) x [d])
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (InvertFlags (CMPconst [c] (SRAconst <x.Type> x [d])))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64InvertFlags)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = c
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64SRAconst, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v1.AuxInt = d
|
|
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (CMPshiftRA x (MOVDconst [c]) [d])
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (CMPconst x [int64(int64(c)>>uint64(d))])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v.AuxInt
|
2016-07-21 12:42:49 -04:00
|
|
|
x := v.Args[0]
|
2016-08-10 13:24:03 -04:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64CMPconst)
|
|
|
|
|
v.AuxInt = int64(int64(c) >> uint64(d))
|
2016-07-21 12:42:49 -04:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64CMPshiftRL_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (CMPshiftRL (MOVDconst [c]) x [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (InvertFlags (CMPconst [c] (SRLconst <x.Type> x [d])))
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64InvertFlags)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = c
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64SRLconst, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v1.AuxInt = d
|
|
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CMPshiftRL x (MOVDconst [c]) [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CMPconst x [int64(uint64(c)>>uint64(d))])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64CMPconst)
|
|
|
|
|
v.AuxInt = int64(uint64(c) >> uint64(d))
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64CSELULT_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (CSELULT x (MOVDconst [0]) flag)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT0 x flag)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_1.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
flag := v.Args[2]
|
|
|
|
|
v.reset(OpARM64CSELULT0)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(flag)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CSELULT _ y (FlagEQ))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: y
|
|
|
|
|
for {
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
if v_2.Op != OpARM64FlagEQ {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = y.Type
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CSELULT x _ (FlagLT_ULT))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
if v_2.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CSELULT _ y (FlagLT_UGT))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: y
|
|
|
|
|
for {
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
if v_2.Op != OpARM64FlagLT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = y.Type
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CSELULT x _ (FlagGT_ULT))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
if v_2.Op != OpARM64FlagGT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CSELULT _ y (FlagGT_UGT))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: y
|
|
|
|
|
for {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
if v_2.Op != OpARM64FlagGT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = y.Type
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64CSELULT0_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (CSELULT0 _ (FlagEQ))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64FlagEQ {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CSELULT0 x (FlagLT_ULT))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CSELULT0 _ (FlagLT_UGT))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64FlagLT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CSELULT0 x (FlagGT_ULT))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64FlagGT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (CSELULT0 _ (FlagGT_UGT))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64FlagGT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64DIV_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (DIV (MOVDconst [c]) (MOVDconst [d]))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [int64(c)/int64(d)])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = int64(c) / int64(d)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64DIVW_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (DIVW (MOVDconst [c]) (MOVDconst [d]))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [int64(int32(c)/int32(d))])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = int64(int32(c) / int32(d))
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64Equal_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Equal (FlagEQ))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagEQ {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Equal (FlagLT_ULT))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Equal (FlagLT_UGT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Equal (FlagGT_ULT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Equal (FlagGT_UGT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Equal (InvertFlags x))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (Equal x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64InvertFlags {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v.reset(OpARM64Equal)
|
2016-07-21 12:42:49 -04:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64FMOVDload_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FMOVDload [off1+off2] {sym} ptr mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FMOVDload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FMOVDload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg(ptr)
|
2016-07-21 12:42:49 -04:00
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64FMOVDstore_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FMOVDstore [off1+off2] {sym} ptr val mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FMOVDstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FMOVDstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64FMOVSload_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FMOVSload [off1+off2] {sym} ptr mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FMOVSload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FMOVSload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FMOVSload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64FMOVSstore_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FMOVSstore [off1+off2] {sym} ptr val mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FMOVSstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FMOVSstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64GreaterEqual_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterEqual (FlagEQ))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagEQ {
|
|
|
|
|
break
|
|
|
|
|
}
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.reset(OpARM64MOVDconst)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AuxInt = 1
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterEqual (FlagLT_ULT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterEqual (FlagLT_UGT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.reset(OpARM64MOVDconst)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterEqual (FlagGT_ULT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterEqual (FlagGT_UGT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.reset(OpARM64MOVDconst)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AuxInt = 1
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterEqual (InvertFlags x))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (LessEqual x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64InvertFlags {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v.reset(OpARM64LessEqual)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64GreaterEqualU_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterEqualU (FlagEQ))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [1])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagEQ {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (GreaterEqualU (FlagLT_ULT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterEqualU (FlagLT_UGT))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterEqualU (FlagGT_ULT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterEqualU (FlagGT_UGT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterEqualU (InvertFlags x))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (LessEqualU x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64InvertFlags {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v.reset(OpARM64LessEqualU)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64GreaterThan_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterThan (FlagEQ))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagEQ {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterThan (FlagLT_ULT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterThan (FlagLT_UGT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterThan (FlagGT_ULT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterThan (FlagGT_UGT))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterThan (InvertFlags x))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (LessThan x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64InvertFlags {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v.reset(OpARM64LessThan)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64GreaterThanU_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterThanU (FlagEQ))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagEQ {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterThanU (FlagLT_ULT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterThanU (FlagLT_UGT))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterThanU (FlagGT_ULT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GreaterThanU (FlagGT_UGT))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (GreaterThanU (InvertFlags x))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessThanU x)
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64InvertFlags {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v.reset(OpARM64LessThanU)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64LessEqual_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessEqual (FlagEQ))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagEQ {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessEqual (FlagLT_ULT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessEqual (FlagLT_UGT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessEqual (FlagGT_ULT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessEqual (FlagGT_UGT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessEqual (InvertFlags x))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (GreaterEqual x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64InvertFlags {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v.reset(OpARM64GreaterEqual)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64LessEqualU_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessEqualU (FlagEQ))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagEQ {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessEqualU (FlagLT_ULT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessEqualU (FlagLT_UGT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessEqualU (FlagGT_ULT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessEqualU (FlagGT_UGT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessEqualU (InvertFlags x))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (GreaterEqualU x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64InvertFlags {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v.reset(OpARM64GreaterEqualU)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64LessThan_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessThan (FlagEQ))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagEQ {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessThan (FlagLT_ULT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessThan (FlagLT_UGT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessThan (FlagGT_ULT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessThan (FlagGT_UGT))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessThan (InvertFlags x))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (GreaterThan x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64InvertFlags {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v.reset(OpARM64GreaterThan)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64LessThanU_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessThanU (FlagEQ))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagEQ {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessThanU (FlagLT_ULT))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessThanU (FlagLT_UGT))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessThanU (FlagGT_ULT))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [1])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessThanU (FlagGT_UGT))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (LessThanU (InvertFlags x))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (GreaterThanU x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64InvertFlags {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v.reset(OpARM64GreaterThanU)
|
|
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOD_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MOD (MOVDconst [c]) (MOVDconst [d]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [int64(c)%int64(d)])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = int64(c) % int64(d)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MODW_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MODW (MOVDconst [c]) (MOVDconst [d]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [int64(int32(c)%int32(d))])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = int64(int32(c) % int32(d))
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVBUload_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVBUload [off1+off2] {sym} ptr mem)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2017-02-03 20:54:52 -05:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVBUload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVBUload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _))
|
|
|
|
|
// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVBstorezero {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_1.AuxInt
|
|
|
|
|
sym2 := v_1.Aux
|
|
|
|
|
ptr2 := v_1.Args[0]
|
|
|
|
|
if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVBUreg_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVBUreg x:(MOVBUload _ _))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
if x.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVBUreg x:(MOVBUreg _))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVBUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVBUreg (MOVDconst [c]))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [int64(uint8(c))])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = int64(uint8(c))
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVBload_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVBload [off1+off2] {sym} ptr mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2017-02-03 20:54:52 -05:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVBload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVBload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _))
|
|
|
|
|
// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVBstorezero {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_1.AuxInt
|
|
|
|
|
sym2 := v_1.Aux
|
|
|
|
|
ptr2 := v_1.Args[0]
|
|
|
|
|
if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVBreg_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVBreg x:(MOVBload _ _))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVBload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVBreg x:(MOVBreg _))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVBreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MOVBreg (MOVDconst [c]))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [int64(int8(c))])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = int64(int8(c))
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVBstore_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVBstore [off1+off2] {sym} ptr val mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2017-02-03 20:54:52 -05:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVBstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVBstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MOVBstore [off] {sym} ptr (MOVDconst [0]) mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVBstorezero [off] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_1.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVBstorezero)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVBstore [off] {sym} ptr (MOVBreg x) mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVBstore [off] {sym} ptr x mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVBreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVBstore)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVBstore [off] {sym} ptr (MOVBUreg x) mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVBstore [off] {sym} ptr x mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVBUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVBstore)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVBstore [off] {sym} ptr (MOVHreg x) mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVBstore [off] {sym} ptr x mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVHreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVBstore)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVBstore [off] {sym} ptr (MOVHUreg x) mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVBstore [off] {sym} ptr x mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVHUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVBstore)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVBstore [off] {sym} ptr (MOVWreg x) mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVBstore [off] {sym} ptr x mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVWreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVBstore)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(x)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVBstore [off] {sym} ptr (MOVWUreg x) mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVBstore [off] {sym} ptr x mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVWUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVBstore)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(x)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(mem)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return true
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (MOVBstorezero [off1+off2] {sym} ptr mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2017-02-03 20:54:52 -05:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
v.reset(OpARM64MOVBstorezero)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
v.reset(OpARM64MOVBstorezero)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (MOVDload [off1+off2] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
v.reset(OpARM64MOVDload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _))
|
|
|
|
|
// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDstorezero {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_1.AuxInt
|
|
|
|
|
sym2 := v_1.Aux
|
|
|
|
|
ptr2 := v_1.Args[0]
|
|
|
|
|
if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVDreg_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MOVDreg x)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: x.Uses == 1
|
|
|
|
|
// result: (MOVDnop x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
if !(x.Uses == 1) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDnop)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MOVDreg (MOVDconst [c]))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [c])
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = c
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVDstore_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDstore [off1+off2] {sym} ptr val mem)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVDstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDstorezero [off] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_1.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVDstorezero)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVDstorezero_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (MOVDstorezero [off1+off2] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
v.reset(OpARM64MOVDstorezero)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDstorezero)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVHUload_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVHUload [off1+off2] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVHUload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVHUload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
|
|
|
|
|
// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVHstorezero {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_1.AuxInt
|
|
|
|
|
sym2 := v_1.Aux
|
|
|
|
|
ptr2 := v_1.Args[0]
|
|
|
|
|
if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVHUreg_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHUreg x:(MOVBUload _ _))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
if x.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHUreg x:(MOVHUload _ _))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVHUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHUreg x:(MOVBUreg _))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVBUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHUreg x:(MOVHUreg _))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVHUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHUreg (MOVDconst [c]))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [int64(uint16(c))])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = int64(uint16(c))
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVHload_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVHload [off1+off2] {sym} ptr mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVHload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVHload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
|
|
|
|
|
// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVHstorezero {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_1.AuxInt
|
|
|
|
|
sym2 := v_1.Aux
|
|
|
|
|
ptr2 := v_1.Args[0]
|
|
|
|
|
if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVHreg_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHreg x:(MOVBload _ _))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVBload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHreg x:(MOVBUload _ _))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHreg x:(MOVHload _ _))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVHload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHreg x:(MOVBreg _))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-07-22 06:41:14 -04:00
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVBreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHreg x:(MOVBUreg _))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-07-22 06:41:14 -04:00
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVBUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHreg x:(MOVHreg _))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-07-22 06:41:14 -04:00
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVHreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MOVHreg (MOVDconst [c]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [int64(int16(c))])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = int64(int16(c))
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVHstore_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVHstore [off1+off2] {sym} ptr val mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVHstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(ptr)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(val)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVHstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(ptr)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(val)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVHstorezero [off] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_1.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVHstorezero)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHstore [off] {sym} ptr (MOVHreg x) mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVHstore [off] {sym} ptr x mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
ptr := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVHreg {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVHstore)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(ptr)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(x)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(mem)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHstore [off] {sym} ptr (MOVHUreg x) mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVHstore [off] {sym} ptr x mem)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
ptr := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVHUreg {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVHstore)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(ptr)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(x)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(mem)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHstore [off] {sym} ptr (MOVWreg x) mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVHstore [off] {sym} ptr x mem)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
ptr := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVWreg {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVHstore)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(ptr)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(x)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVHstore [off] {sym} ptr (MOVWUreg x) mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVHstore [off] {sym} ptr x mem)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
ptr := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVWUreg {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVHstore)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(ptr)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(x)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVHstorezero_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (MOVHstorezero [off1+off2] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
v.reset(OpARM64MOVHstorezero)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVHstorezero)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVWUload [off1+off2] {sym} ptr mem)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.reset(OpARM64MOVWUload)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVWUload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
|
|
|
|
|
// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVWstorezero {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_1.AuxInt
|
|
|
|
|
sym2 := v_1.Aux
|
|
|
|
|
ptr2 := v_1.Args[0]
|
|
|
|
|
if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWUreg x:(MOVBUload _ _))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWUreg x:(MOVHUload _ _))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVHUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWUreg x:(MOVWUload _ _))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVWUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWUreg x:(MOVBUreg _))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVBUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWUreg x:(MOVHUreg _))
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVHUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWUreg x:(MOVWUreg _))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVWUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWUreg (MOVDconst [c]))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [int64(uint32(c))])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = int64(uint32(c))
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVWload [off1+off2] {sym} ptr mem)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVWload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVWload)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
|
|
|
|
|
// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVWstorezero {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_1.AuxInt
|
|
|
|
|
sym2 := v_1.Aux
|
|
|
|
|
ptr2 := v_1.Args[0]
|
|
|
|
|
if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVWreg_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWreg x:(MOVBload _ _))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVBload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWreg x:(MOVBUload _ _))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWreg x:(MOVHload _ _))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVHload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWreg x:(MOVHUload _ _))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVHUload {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVDreg)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWreg x:(MOVWload _ _))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDreg x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
if x.Op != OpARM64MOVWload {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWreg x:(MOVBreg _))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVBreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWreg x:(MOVBUreg _))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVBUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWreg x:(MOVHreg _))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVHreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWreg x:(MOVHreg _))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDreg x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVHreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWreg x:(MOVWreg _))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDreg x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x.Op != OpARM64MOVWreg {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVDreg)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueARM64_OpARM64MOVWreg_10(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MOVWreg (MOVDconst [c]))
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [int64(int32(c))])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
c := v_0.AuxInt
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.reset(OpARM64MOVDconst)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AuxInt = int64(int32(c))
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVWstore [off1+off2] {sym} ptr val mem)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVWstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVWstore)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVWstorezero [off] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_1.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVWstorezero)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVWstore [off] {sym} ptr x mem)
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVWreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVWstore)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVWstore [off] {sym} ptr x mem)
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVWUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVWstore)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool {
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (MOVWstorezero [off1+off2] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
v.reset(OpARM64MOVWstorezero)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
off1 := v.AuxInt
|
|
|
|
|
sym1 := v.Aux
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off2 := v_0.AuxInt
|
|
|
|
|
sym2 := v_0.Aux
|
|
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVWstorezero)
|
|
|
|
|
v.AuxInt = off1 + off2
|
|
|
|
|
v.Aux = mergeSym(sym1, sym2)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MUL_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MUL x (MOVDconst [-1]))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (NEG x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_1.AuxInt != -1 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64NEG)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MUL (MOVDconst [-1]) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (NEG x)
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_0.AuxInt != -1 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64NEG)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MUL _ (MOVDconst [0]))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_1.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MUL (MOVDconst [0]) _)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_0.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MUL x (MOVDconst [1]))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_1.AuxInt != 1 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MUL (MOVDconst [1]) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_0.AuxInt != 1 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MUL x (MOVDconst [c]))
|
|
|
|
|
// cond: isPowerOfTwo(c)
|
|
|
|
|
// result: (SLLconst [log2(c)] x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(isPowerOfTwo(c)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MUL (MOVDconst [c]) x)
|
|
|
|
|
// cond: isPowerOfTwo(c)
|
|
|
|
|
// result: (SLLconst [log2(c)] x)
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
if !(isPowerOfTwo(c)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MUL x (MOVDconst [c]))
|
|
|
|
|
// cond: isPowerOfTwo(c-1) && c >= 3
|
|
|
|
|
// result: (ADDshiftLL x x [log2(c-1)])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(isPowerOfTwo(c-1) && c >= 3) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64ADDshiftLL)
|
|
|
|
|
v.AuxInt = log2(c - 1)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MUL (MOVDconst [c]) x)
|
|
|
|
|
// cond: isPowerOfTwo(c-1) && c >= 3
|
|
|
|
|
// result: (ADDshiftLL x x [log2(c-1)])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
if !(isPowerOfTwo(c-1) && c >= 3) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64ADDshiftLL)
|
|
|
|
|
v.AuxInt = log2(c - 1)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueARM64_OpARM64MUL_10(v *Value) bool {
|
|
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MUL x (MOVDconst [c]))
|
|
|
|
|
// cond: isPowerOfTwo(c+1) && c >= 7
|
|
|
|
|
// result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(isPowerOfTwo(c+1) && c >= 7) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64ADDshiftLL)
|
|
|
|
|
v.AuxInt = log2(c + 1)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MUL (MOVDconst [c]) x)
|
|
|
|
|
// cond: isPowerOfTwo(c+1) && c >= 7
|
|
|
|
|
// result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
if !(isPowerOfTwo(c+1) && c >= 7) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64ADDshiftLL)
|
|
|
|
|
v.AuxInt = log2(c + 1)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MUL x (MOVDconst [c]))
|
|
|
|
|
// cond: c%3 == 0 && isPowerOfTwo(c/3)
|
|
|
|
|
// result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(c%3 == 0 && isPowerOfTwo(c/3)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c / 3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = 1
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MUL (MOVDconst [c]) x)
|
|
|
|
|
// cond: c%3 == 0 && isPowerOfTwo(c/3)
|
|
|
|
|
// result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
if !(c%3 == 0 && isPowerOfTwo(c/3)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c / 3)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
|
|
|
|
|
v0.AuxInt = 1
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MUL x (MOVDconst [c]))
|
|
|
|
|
// cond: c%5 == 0 && isPowerOfTwo(c/5)
|
|
|
|
|
// result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(c%5 == 0 && isPowerOfTwo(c/5)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c / 5)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = 2
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MUL (MOVDconst [c]) x)
|
|
|
|
|
// cond: c%5 == 0 && isPowerOfTwo(c/5)
|
|
|
|
|
// result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
if !(c%5 == 0 && isPowerOfTwo(c/5)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c / 5)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
|
|
|
|
|
v0.AuxInt = 2
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MUL x (MOVDconst [c]))
|
|
|
|
|
// cond: c%7 == 0 && isPowerOfTwo(c/7)
|
|
|
|
|
// result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(c%7 == 0 && isPowerOfTwo(c/7)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c / 7)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = 3
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MUL (MOVDconst [c]) x)
|
|
|
|
|
// cond: c%7 == 0 && isPowerOfTwo(c/7)
|
|
|
|
|
// result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
if !(c%7 == 0 && isPowerOfTwo(c/7)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c / 7)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
|
|
|
|
|
v0.AuxInt = 3
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
|
|
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (MUL x (MOVDconst [c]))
|
|
|
|
|
// cond: c%9 == 0 && isPowerOfTwo(c/9)
|
|
|
|
|
// result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(c%9 == 0 && isPowerOfTwo(c/9)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c / 9)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = 3
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MUL (MOVDconst [c]) x)
|
|
|
|
|
// cond: c%9 == 0 && isPowerOfTwo(c/9)
|
|
|
|
|
// result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
|
2016-08-10 13:24:03 -04:00
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
if !(c%9 == 0 && isPowerOfTwo(c/9)) {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c / 9)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
|
|
|
|
|
v0.AuxInt = 3
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2016-08-10 13:24:03 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueARM64_OpARM64MUL_20(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MUL (MOVDconst [c]) (MOVDconst [d]))
|
2016-08-10 13:24:03 -04:00
|
|
|
// cond:
|
2017-03-30 03:30:22 +00:00
|
|
|
// result: (MOVDconst [c*d])
|
2016-08-10 13:24:03 -04:00
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
c := v_0.AuxInt
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
d := v_1.AuxInt
|
2016-08-10 13:24:03 -04:00
|
|
|
v.reset(OpARM64MOVDconst)
|
2017-03-30 03:30:22 +00:00
|
|
|
v.AuxInt = c * d
|
2016-08-10 13:24:03 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MUL (MOVDconst [d]) (MOVDconst [c]))
|
2016-08-10 13:24:03 -04:00
|
|
|
// cond:
|
2017-03-30 03:30:22 +00:00
|
|
|
// result: (MOVDconst [c*d])
|
2016-08-10 13:24:03 -04:00
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
d := v_0.AuxInt
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = c * d
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MULW_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MULW x (MOVDconst [c]))
|
|
|
|
|
// cond: int32(c)==-1
|
|
|
|
|
// result: (NEG x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(int32(c) == -1) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64NEG)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MULW (MOVDconst [c]) x)
|
|
|
|
|
// cond: int32(c)==-1
|
|
|
|
|
// result: (NEG x)
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
c := v_0.AuxInt
|
2016-08-10 13:24:03 -04:00
|
|
|
x := v.Args[1]
|
2017-03-30 03:30:22 +00:00
|
|
|
if !(int32(c) == -1) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64NEG)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MULW _ (MOVDconst [c]))
|
|
|
|
|
// cond: int32(c)==0
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(int32(c) == 0) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MULW (MOVDconst [c]) _)
|
|
|
|
|
// cond: int32(c)==0
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
if !(int32(c) == 0) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MULW x (MOVDconst [c]))
|
|
|
|
|
// cond: int32(c)==1
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(int32(c) == 1) {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MULW (MOVDconst [c]) x)
|
|
|
|
|
// cond: int32(c)==1
|
|
|
|
|
// result: x
|
2016-08-10 13:24:03 -04:00
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
2017-03-30 03:30:22 +00:00
|
|
|
if !(int32(c) == 1) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MULW x (MOVDconst [c]))
|
|
|
|
|
// cond: isPowerOfTwo(c)
|
|
|
|
|
// result: (SLLconst [log2(c)] x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
2016-08-10 13:24:03 -04:00
|
|
|
if !(isPowerOfTwo(c)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MULW (MOVDconst [c]) x)
|
2016-08-10 13:24:03 -04:00
|
|
|
// cond: isPowerOfTwo(c)
|
|
|
|
|
// result: (SLLconst [log2(c)] x)
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
if !(isPowerOfTwo(c)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MULW x (MOVDconst [c]))
|
|
|
|
|
// cond: isPowerOfTwo(c-1) && int32(c) >= 3
|
|
|
|
|
// result: (ADDshiftLL x x [log2(c-1)])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64ADDshiftLL)
|
|
|
|
|
v.AuxInt = log2(c - 1)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MULW (MOVDconst [c]) x)
|
|
|
|
|
// cond: isPowerOfTwo(c-1) && int32(c) >= 3
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (ADDshiftLL x x [log2(c-1)])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
2017-03-30 03:30:22 +00:00
|
|
|
if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64ADDshiftLL)
|
|
|
|
|
v.AuxInt = log2(c - 1)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueARM64_OpARM64MULW_10(v *Value) bool {
|
|
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MULW x (MOVDconst [c]))
|
|
|
|
|
// cond: isPowerOfTwo(c+1) && int32(c) >= 7
|
|
|
|
|
// result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64ADDshiftLL)
|
|
|
|
|
v.AuxInt = log2(c + 1)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MULW (MOVDconst [c]) x)
|
|
|
|
|
// cond: isPowerOfTwo(c+1) && int32(c) >= 7
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
2017-03-30 03:30:22 +00:00
|
|
|
if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64ADDshiftLL)
|
|
|
|
|
v.AuxInt = log2(c + 1)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MULW x (MOVDconst [c]))
|
|
|
|
|
// cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
|
|
|
|
|
// result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c / 3)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
|
|
|
|
|
v0.AuxInt = 1
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MULW (MOVDconst [c]) x)
|
|
|
|
|
// cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
2017-03-30 03:30:22 +00:00
|
|
|
if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c / 3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = 1
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MULW x (MOVDconst [c]))
|
|
|
|
|
// cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
|
|
|
|
|
// result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c / 5)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
|
|
|
|
|
v0.AuxInt = 2
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MULW (MOVDconst [c]) x)
|
|
|
|
|
// cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
2017-03-30 03:30:22 +00:00
|
|
|
if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c / 5)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = 2
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MULW x (MOVDconst [c]))
|
|
|
|
|
// cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c / 7)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = 3
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2016-08-03 09:56:36 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MULW (MOVDconst [c]) x)
|
|
|
|
|
// cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
|
|
|
|
|
// result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
2017-03-30 03:30:22 +00:00
|
|
|
if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
2017-03-30 03:30:22 +00:00
|
|
|
v.AuxInt = log2(c / 7)
|
2017-03-25 15:05:42 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
|
|
|
|
|
v0.AuxInt = 3
|
2017-03-30 03:30:22 +00:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
|
|
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MULW x (MOVDconst [c]))
|
|
|
|
|
// cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
|
|
|
|
|
// result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c / 9)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
|
|
|
|
|
v0.AuxInt = 3
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MULW (MOVDconst [c]) x)
|
|
|
|
|
// cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
|
|
|
|
|
// result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = log2(c / 9)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
|
|
|
|
|
v0.AuxInt = 3
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(x)
|
2016-08-10 13:24:03 -04:00
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueARM64_OpARM64MULW_20(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MULW (MOVDconst [c]) (MOVDconst [d]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2017-03-30 03:30:22 +00:00
|
|
|
// result: (MOVDconst [int64(int32(c)*int32(d))])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
2016-08-03 09:56:36 -04:00
|
|
|
v_1 := v.Args[1]
|
2016-08-10 13:24:03 -04:00
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
2017-03-30 03:30:22 +00:00
|
|
|
v.AuxInt = int64(int32(c) * int32(d))
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MULW (MOVDconst [d]) (MOVDconst [c]))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [int64(int32(c)*int32(d))])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_0.AuxInt
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
c := v_1.AuxInt
|
2017-03-30 03:30:22 +00:00
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = int64(int32(c) * int32(d))
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64MVN_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (MVN (MOVDconst [c]))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [^c])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
c := v_0.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = ^c
|
2017-03-25 15:05:42 -07:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64NEG_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (NEG (MOVDconst [c]))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [-c])
|
2017-03-25 15:05:42 -07:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
c := v_0.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = -c
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64NotEqual_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (NotEqual (FlagEQ))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagEQ {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-25 15:05:42 -07:00
|
|
|
v.reset(OpARM64MOVDconst)
|
2017-03-29 18:06:04 +00:00
|
|
|
v.AuxInt = 0
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (NotEqual (FlagLT_ULT))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [1])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_ULT {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (NotEqual (FlagLT_UGT))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [1])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagLT_UGT {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (NotEqual (FlagGT_ULT))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [1])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_ULT {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (NotEqual (FlagGT_UGT))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [1])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64FlagGT_UGT {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 1
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (NotEqual (InvertFlags x))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (NotEqual x)
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64InvertFlags {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v.reset(OpARM64NotEqual)
|
2017-03-25 15:05:42 -07:00
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64OR_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (OR x (MOVDconst [c]))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ORconst [c] x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
2017-03-29 18:06:04 +00:00
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
c := v_1.AuxInt
|
2017-03-30 03:30:22 +00:00
|
|
|
v.reset(OpARM64ORconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (OR (MOVDconst [c]) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ORconst [c] x)
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ORconst)
|
|
|
|
|
v.AuxInt = c
|
2017-03-25 15:05:42 -07:00
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (OR x x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: x
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2017-03-30 03:30:22 +00:00
|
|
|
if x != v.Args[1] {
|
2017-03-29 18:06:04 +00:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
2016-08-10 13:24:03 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (OR x (SLLconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ORshiftLL x y [c])
|
2016-08-10 13:24:03 -04:00
|
|
|
for {
|
2017-03-29 18:06:04 +00:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
2017-03-30 03:30:22 +00:00
|
|
|
if v_1.Op != OpARM64SLLconst {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-29 18:06:04 +00:00
|
|
|
c := v_1.AuxInt
|
2017-03-30 03:30:22 +00:00
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64ORshiftLL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (OR (SLLconst [c] y) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ORshiftLL x y [c])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SLLconst {
|
2017-03-29 18:06:04 +00:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
c := v_0.AuxInt
|
|
|
|
|
y := v_0.Args[0]
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ORshiftLL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (OR x (SRLconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ORshiftRL x y [c])
|
2016-08-10 13:24:03 -04:00
|
|
|
for {
|
2017-03-29 18:06:04 +00:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
2017-03-30 03:30:22 +00:00
|
|
|
if v_1.Op != OpARM64SRLconst {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-29 18:06:04 +00:00
|
|
|
c := v_1.AuxInt
|
2017-03-30 03:30:22 +00:00
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64ORshiftRL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2016-08-10 13:24:03 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (OR (SRLconst [c] y) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ORshiftRL x y [c])
|
2016-08-10 13:24:03 -04:00
|
|
|
for {
|
2017-03-29 18:06:04 +00:00
|
|
|
v_0 := v.Args[0]
|
2017-03-30 03:30:22 +00:00
|
|
|
if v_0.Op != OpARM64SRLconst {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-29 18:06:04 +00:00
|
|
|
c := v_0.AuxInt
|
2017-03-30 03:30:22 +00:00
|
|
|
y := v_0.Args[0]
|
2017-03-29 18:06:04 +00:00
|
|
|
x := v.Args[1]
|
2017-03-30 03:30:22 +00:00
|
|
|
v.reset(OpARM64ORshiftRL)
|
|
|
|
|
v.AuxInt = c
|
2017-03-29 18:06:04 +00:00
|
|
|
v.AddArg(x)
|
2017-03-30 03:30:22 +00:00
|
|
|
v.AddArg(y)
|
2017-03-29 18:06:04 +00:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (OR x (SRAconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ORshiftRA x y [c])
|
2017-03-29 18:06:04 +00:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRAconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64ORshiftRA)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2017-03-29 18:06:04 +00:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (OR (SRAconst [c] y) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ORshiftRA x y [c])
|
2017-03-29 18:06:04 +00:00
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
2017-03-30 03:30:22 +00:00
|
|
|
if v_0.Op != OpARM64SRAconst {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-29 18:06:04 +00:00
|
|
|
c := v_0.AuxInt
|
2017-03-30 03:30:22 +00:00
|
|
|
y := v_0.Args[0]
|
2017-03-29 18:06:04 +00:00
|
|
|
x := v.Args[1]
|
2017-03-30 03:30:22 +00:00
|
|
|
v.reset(OpARM64ORshiftRA)
|
|
|
|
|
v.AuxInt = c
|
2017-03-29 18:06:04 +00:00
|
|
|
v.AddArg(x)
|
2017-03-30 03:30:22 +00:00
|
|
|
v.AddArg(y)
|
2017-03-29 18:06:04 +00:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)))
|
|
|
|
|
// cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
|
|
|
|
|
// result: @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
|
2017-03-29 18:06:04 +00:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
t := v.Type
|
|
|
|
|
o0 := v.Args[0]
|
|
|
|
|
if o0.Op != OpARM64ORshiftLL {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if o0.AuxInt != 8 {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
o1 := o0.Args[0]
|
|
|
|
|
if o1.Op != OpARM64ORshiftLL {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if o1.AuxInt != 16 {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
s0 := o1.Args[0]
|
|
|
|
|
if s0.Op != OpARM64SLLconst {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if s0.AuxInt != 24 {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
y0 := s0.Args[0]
|
|
|
|
|
if y0.Op != OpARM64MOVDnop {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
x0 := y0.Args[0]
|
|
|
|
|
if x0.Op != OpARM64MOVBUload {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i3 := x0.AuxInt
|
|
|
|
|
s := x0.Aux
|
|
|
|
|
p := x0.Args[0]
|
|
|
|
|
mem := x0.Args[1]
|
|
|
|
|
y1 := o1.Args[1]
|
|
|
|
|
if y1.Op != OpARM64MOVDnop {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
x1 := y1.Args[0]
|
|
|
|
|
if x1.Op != OpARM64MOVBUload {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i2 := x1.AuxInt
|
|
|
|
|
if x1.Aux != s {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if p != x1.Args[0] {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if mem != x1.Args[1] {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
y2 := o0.Args[1]
|
|
|
|
|
if y2.Op != OpARM64MOVDnop {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
x2 := y2.Args[0]
|
|
|
|
|
if x2.Op != OpARM64MOVBUload {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i1 := x2.AuxInt
|
|
|
|
|
if x2.Aux != s {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if p != x2.Args[0] {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if mem != x2.Args[1] {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
y3 := v.Args[1]
|
|
|
|
|
if y3.Op != OpARM64MOVDnop {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
x3 := y3.Args[0]
|
|
|
|
|
if x3.Op != OpARM64MOVBUload {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i0 := x3.AuxInt
|
|
|
|
|
if x3.Aux != s {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if p != x3.Args[0] {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if mem != x3.Args[1] {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
b = mergePoint(b, x0, x1, x2, x3)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v0.Aux = s
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
|
|
|
|
v1.AuxInt = i0
|
|
|
|
|
v1.AddArg(p)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v0.AddArg(mem)
|
2017-03-29 18:06:04 +00:00
|
|
|
return true
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
|
|
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (OR <t> y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem))))
|
|
|
|
|
// cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
|
|
|
|
|
// result: @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
|
2017-03-29 18:06:04 +00:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
t := v.Type
|
|
|
|
|
y3 := v.Args[0]
|
|
|
|
|
if y3.Op != OpARM64MOVDnop {
|
2017-03-25 15:05:42 -07:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
x3 := y3.Args[0]
|
|
|
|
|
if x3.Op != OpARM64MOVBUload {
|
2017-03-25 15:05:42 -07:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i0 := x3.AuxInt
|
|
|
|
|
s := x3.Aux
|
|
|
|
|
p := x3.Args[0]
|
|
|
|
|
mem := x3.Args[1]
|
|
|
|
|
o0 := v.Args[1]
|
|
|
|
|
if o0.Op != OpARM64ORshiftLL {
|
2017-03-25 15:05:42 -07:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if o0.AuxInt != 8 {
|
2017-03-25 15:05:42 -07:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
o1 := o0.Args[0]
|
|
|
|
|
if o1.Op != OpARM64ORshiftLL {
|
2017-03-25 15:05:42 -07:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if o1.AuxInt != 16 {
|
2017-03-25 15:05:42 -07:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
s0 := o1.Args[0]
|
|
|
|
|
if s0.Op != OpARM64SLLconst {
|
2017-03-25 15:05:42 -07:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if s0.AuxInt != 24 {
|
2017-03-25 15:05:42 -07:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
y0 := s0.Args[0]
|
|
|
|
|
if y0.Op != OpARM64MOVDnop {
|
2017-03-25 15:05:42 -07:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
x0 := y0.Args[0]
|
|
|
|
|
if x0.Op != OpARM64MOVBUload {
|
2017-03-25 15:05:42 -07:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i3 := x0.AuxInt
|
|
|
|
|
if x0.Aux != s {
|
2017-03-25 15:05:42 -07:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if p != x0.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x0.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y1 := o1.Args[1]
|
|
|
|
|
if y1.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x1 := y1.Args[0]
|
|
|
|
|
if x1.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i2 := x1.AuxInt
|
|
|
|
|
if x1.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x1.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y2 := o0.Args[1]
|
|
|
|
|
if y2.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x2 := y2.Args[0]
|
|
|
|
|
if x2.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i1 := x2.AuxInt
|
|
|
|
|
if x2.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x2.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x2.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = mergePoint(b, x0, x1, x2, x3)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v0.Aux = s
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
|
|
|
|
v1.AuxInt = i0
|
|
|
|
|
v1.AddArg(p)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v0.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)))
|
|
|
|
|
// cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
|
|
|
|
|
// result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
|
2017-03-29 18:06:04 +00:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
t := v.Type
|
|
|
|
|
o0 := v.Args[0]
|
|
|
|
|
if o0.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o0.AuxInt != 8 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o1 := o0.Args[0]
|
|
|
|
|
if o1.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o1.AuxInt != 16 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o2 := o1.Args[0]
|
|
|
|
|
if o2.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o2.AuxInt != 24 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o3 := o2.Args[0]
|
|
|
|
|
if o3.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o3.AuxInt != 32 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o4 := o3.Args[0]
|
|
|
|
|
if o4.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o4.AuxInt != 40 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o5 := o4.Args[0]
|
|
|
|
|
if o5.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o5.AuxInt != 48 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
s0 := o5.Args[0]
|
|
|
|
|
if s0.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if s0.AuxInt != 56 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y0 := s0.Args[0]
|
|
|
|
|
if y0.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x0 := y0.Args[0]
|
|
|
|
|
if x0.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i7 := x0.AuxInt
|
|
|
|
|
s := x0.Aux
|
|
|
|
|
p := x0.Args[0]
|
|
|
|
|
mem := x0.Args[1]
|
|
|
|
|
y1 := o5.Args[1]
|
|
|
|
|
if y1.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x1 := y1.Args[0]
|
|
|
|
|
if x1.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i6 := x1.AuxInt
|
|
|
|
|
if x1.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x1.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y2 := o4.Args[1]
|
|
|
|
|
if y2.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x2 := y2.Args[0]
|
|
|
|
|
if x2.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i5 := x2.AuxInt
|
|
|
|
|
if x2.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x2.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x2.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y3 := o3.Args[1]
|
|
|
|
|
if y3.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x3 := y3.Args[0]
|
|
|
|
|
if x3.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i4 := x3.AuxInt
|
|
|
|
|
if x3.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x3.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x3.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y4 := o2.Args[1]
|
|
|
|
|
if y4.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x4 := y4.Args[0]
|
|
|
|
|
if x4.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i3 := x4.AuxInt
|
|
|
|
|
if x4.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x4.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x4.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y5 := o1.Args[1]
|
|
|
|
|
if y5.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x5 := y5.Args[0]
|
|
|
|
|
if x5.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i2 := x5.AuxInt
|
|
|
|
|
if x5.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x5.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x5.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y6 := o0.Args[1]
|
|
|
|
|
if y6.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x6 := y6.Args[0]
|
|
|
|
|
if x6.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i1 := x6.AuxInt
|
|
|
|
|
if x6.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x6.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x6.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y7 := v.Args[1]
|
|
|
|
|
if y7.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x7 := y7.Args[0]
|
|
|
|
|
if x7.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i0 := x7.AuxInt
|
|
|
|
|
if x7.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x7.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x7.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64REV, t)
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
|
|
|
|
|
v1.Aux = s
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
|
|
|
|
v2.AuxInt = i0
|
|
|
|
|
v2.AddArg(p)
|
|
|
|
|
v1.AddArg(v2)
|
|
|
|
|
v1.AddArg(mem)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))))
|
|
|
|
|
// cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
|
|
|
|
|
// result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
y7 := v.Args[0]
|
|
|
|
|
if y7.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x7 := y7.Args[0]
|
|
|
|
|
if x7.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i0 := x7.AuxInt
|
|
|
|
|
s := x7.Aux
|
|
|
|
|
p := x7.Args[0]
|
|
|
|
|
mem := x7.Args[1]
|
|
|
|
|
o0 := v.Args[1]
|
|
|
|
|
if o0.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o0.AuxInt != 8 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o1 := o0.Args[0]
|
|
|
|
|
if o1.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o1.AuxInt != 16 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o2 := o1.Args[0]
|
|
|
|
|
if o2.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o2.AuxInt != 24 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o3 := o2.Args[0]
|
|
|
|
|
if o3.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o3.AuxInt != 32 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o4 := o3.Args[0]
|
|
|
|
|
if o4.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o4.AuxInt != 40 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o5 := o4.Args[0]
|
|
|
|
|
if o5.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o5.AuxInt != 48 {
|
2017-03-25 15:05:42 -07:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
s0 := o5.Args[0]
|
|
|
|
|
if s0.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if s0.AuxInt != 56 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y0 := s0.Args[0]
|
|
|
|
|
if y0.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x0 := y0.Args[0]
|
|
|
|
|
if x0.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i7 := x0.AuxInt
|
|
|
|
|
if x0.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x0.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x0.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y1 := o5.Args[1]
|
|
|
|
|
if y1.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x1 := y1.Args[0]
|
|
|
|
|
if x1.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i6 := x1.AuxInt
|
|
|
|
|
if x1.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x1.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y2 := o4.Args[1]
|
|
|
|
|
if y2.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x2 := y2.Args[0]
|
|
|
|
|
if x2.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i5 := x2.AuxInt
|
|
|
|
|
if x2.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x2.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x2.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y3 := o3.Args[1]
|
|
|
|
|
if y3.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x3 := y3.Args[0]
|
|
|
|
|
if x3.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i4 := x3.AuxInt
|
|
|
|
|
if x3.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x3.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x3.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y4 := o2.Args[1]
|
|
|
|
|
if y4.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x4 := y4.Args[0]
|
|
|
|
|
if x4.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i3 := x4.AuxInt
|
|
|
|
|
if x4.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x4.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x4.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y5 := o1.Args[1]
|
|
|
|
|
if y5.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x5 := y5.Args[0]
|
|
|
|
|
if x5.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i2 := x5.AuxInt
|
|
|
|
|
if x5.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x5.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x5.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y6 := o0.Args[1]
|
|
|
|
|
if y6.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x6 := y6.Args[0]
|
|
|
|
|
if x6.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i1 := x6.AuxInt
|
|
|
|
|
if x6.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x6.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x6.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64REV, t)
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
|
|
|
|
|
v1.Aux = s
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
|
|
|
|
v2.AuxInt = i0
|
|
|
|
|
v2.AddArg(p)
|
|
|
|
|
v1.AddArg(v2)
|
|
|
|
|
v1.AddArg(mem)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-29 18:06:04 +00:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
|
|
|
|
|
// cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
|
|
|
|
|
// result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
|
2017-03-29 18:06:04 +00:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
t := v.Type
|
|
|
|
|
o0 := v.Args[0]
|
|
|
|
|
if o0.Op != OpARM64ORshiftLL {
|
2017-03-25 15:05:42 -07:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if o0.AuxInt != 8 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o1 := o0.Args[0]
|
|
|
|
|
if o1.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o1.AuxInt != 16 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
s0 := o1.Args[0]
|
|
|
|
|
if s0.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if s0.AuxInt != 24 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y0 := s0.Args[0]
|
|
|
|
|
if y0.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x0 := y0.Args[0]
|
|
|
|
|
if x0.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i0 := x0.AuxInt
|
|
|
|
|
s := x0.Aux
|
|
|
|
|
p := x0.Args[0]
|
|
|
|
|
mem := x0.Args[1]
|
|
|
|
|
y1 := o1.Args[1]
|
|
|
|
|
if y1.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x1 := y1.Args[0]
|
|
|
|
|
if x1.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i1 := x1.AuxInt
|
|
|
|
|
if x1.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x1.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y2 := o0.Args[1]
|
|
|
|
|
if y2.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x2 := y2.Args[0]
|
|
|
|
|
if x2.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i2 := x2.AuxInt
|
|
|
|
|
if x2.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x2.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x2.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y3 := v.Args[1]
|
|
|
|
|
if y3.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x3 := y3.Args[0]
|
|
|
|
|
if x3.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i3 := x3.AuxInt
|
|
|
|
|
if x3.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x3.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x3.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = mergePoint(b, x0, x1, x2, x3)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
|
|
|
|
|
v1.Aux = s
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
|
|
|
|
v2.AuxInt = i0
|
|
|
|
|
v2.AddArg(p)
|
|
|
|
|
v1.AddArg(v2)
|
|
|
|
|
v1.AddArg(mem)
|
|
|
|
|
v0.AddArg(v1)
|
2016-08-16 14:17:33 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (OR <t> y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))))
|
|
|
|
|
// cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
|
|
|
|
|
// result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
|
2016-08-16 14:17:33 -04:00
|
|
|
for {
|
|
|
|
|
t := v.Type
|
2017-03-30 03:30:22 +00:00
|
|
|
y3 := v.Args[0]
|
|
|
|
|
if y3.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x3 := y3.Args[0]
|
|
|
|
|
if x3.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i3 := x3.AuxInt
|
|
|
|
|
s := x3.Aux
|
|
|
|
|
p := x3.Args[0]
|
|
|
|
|
mem := x3.Args[1]
|
|
|
|
|
o0 := v.Args[1]
|
2016-08-16 14:17:33 -04:00
|
|
|
if o0.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o0.AuxInt != 8 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o1 := o0.Args[0]
|
|
|
|
|
if o1.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o1.AuxInt != 16 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
s0 := o1.Args[0]
|
|
|
|
|
if s0.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if s0.AuxInt != 24 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y0 := s0.Args[0]
|
|
|
|
|
if y0.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x0 := y0.Args[0]
|
|
|
|
|
if x0.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i0 := x0.AuxInt
|
|
|
|
|
if x0.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x0.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x0.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-16 14:17:33 -04:00
|
|
|
y1 := o1.Args[1]
|
|
|
|
|
if y1.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x1 := y1.Args[0]
|
|
|
|
|
if x1.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i1 := x1.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x1.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x1.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y2 := o0.Args[1]
|
|
|
|
|
if y2.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x2 := y2.Args[0]
|
|
|
|
|
if x2.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i2 := x2.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x2.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x2.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x2.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = mergePoint(b, x0, x1, x2, x3)
|
2017-03-30 03:30:22 +00:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
|
2016-08-16 14:17:33 -04:00
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.AddArg(v0)
|
2017-03-30 03:30:22 +00:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
|
|
|
|
|
v1.Aux = s
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
|
|
|
|
v2.AuxInt = i0
|
|
|
|
|
v2.AddArg(p)
|
|
|
|
|
v1.AddArg(v2)
|
|
|
|
|
v1.AddArg(mem)
|
2016-08-16 14:17:33 -04:00
|
|
|
v0.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
|
|
|
|
|
// cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
|
|
|
|
|
// result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
|
2016-08-16 14:17:33 -04:00
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
o0 := v.Args[0]
|
|
|
|
|
if o0.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o0.AuxInt != 8 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o1 := o0.Args[0]
|
|
|
|
|
if o1.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o1.AuxInt != 16 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o2 := o1.Args[0]
|
|
|
|
|
if o2.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o2.AuxInt != 24 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o3 := o2.Args[0]
|
|
|
|
|
if o3.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o3.AuxInt != 32 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o4 := o3.Args[0]
|
|
|
|
|
if o4.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o4.AuxInt != 40 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o5 := o4.Args[0]
|
|
|
|
|
if o5.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o5.AuxInt != 48 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
s0 := o5.Args[0]
|
|
|
|
|
if s0.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if s0.AuxInt != 56 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y0 := s0.Args[0]
|
|
|
|
|
if y0.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x0 := y0.Args[0]
|
|
|
|
|
if x0.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i0 := x0.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
s := x0.Aux
|
|
|
|
|
p := x0.Args[0]
|
|
|
|
|
mem := x0.Args[1]
|
|
|
|
|
y1 := o5.Args[1]
|
|
|
|
|
if y1.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x1 := y1.Args[0]
|
|
|
|
|
if x1.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i1 := x1.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x1.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x1.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y2 := o4.Args[1]
|
|
|
|
|
if y2.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x2 := y2.Args[0]
|
|
|
|
|
if x2.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i2 := x2.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x2.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x2.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x2.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y3 := o3.Args[1]
|
|
|
|
|
if y3.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x3 := y3.Args[0]
|
|
|
|
|
if x3.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i3 := x3.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x3.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x3.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x3.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y4 := o2.Args[1]
|
|
|
|
|
if y4.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x4 := y4.Args[0]
|
|
|
|
|
if x4.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i4 := x4.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x4.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x4.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x4.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y5 := o1.Args[1]
|
|
|
|
|
if y5.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x5 := y5.Args[0]
|
|
|
|
|
if x5.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i5 := x5.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x5.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x5.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x5.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y6 := o0.Args[1]
|
|
|
|
|
if y6.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x6 := y6.Args[0]
|
|
|
|
|
if x6.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i6 := x6.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x6.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x6.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x6.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y7 := v.Args[1]
|
|
|
|
|
if y7.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x7 := y7.Args[0]
|
|
|
|
|
if x7.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i7 := x7.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x7.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x7.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x7.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64REV, t)
|
2016-08-16 14:17:33 -04:00
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
|
2016-08-16 14:17:33 -04:00
|
|
|
v1.Aux = s
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
2017-03-30 03:30:22 +00:00
|
|
|
v2.AuxInt = i0
|
2016-08-16 14:17:33 -04:00
|
|
|
v2.AddArg(p)
|
|
|
|
|
v1.AddArg(v2)
|
|
|
|
|
v1.AddArg(mem)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))))
|
|
|
|
|
// cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
|
|
|
|
|
// result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
|
2016-08-16 14:17:33 -04:00
|
|
|
for {
|
|
|
|
|
t := v.Type
|
2017-03-30 03:30:22 +00:00
|
|
|
y7 := v.Args[0]
|
|
|
|
|
if y7.Op != OpARM64MOVDnop {
|
2017-03-29 18:06:04 +00:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
x7 := y7.Args[0]
|
|
|
|
|
if x7.Op != OpARM64MOVBUload {
|
2017-03-29 18:06:04 +00:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i7 := x7.AuxInt
|
|
|
|
|
s := x7.Aux
|
|
|
|
|
p := x7.Args[0]
|
|
|
|
|
mem := x7.Args[1]
|
|
|
|
|
o0 := v.Args[1]
|
2016-08-16 14:17:33 -04:00
|
|
|
if o0.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o0.AuxInt != 8 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o1 := o0.Args[0]
|
|
|
|
|
if o1.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o1.AuxInt != 16 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o2 := o1.Args[0]
|
|
|
|
|
if o2.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o2.AuxInt != 24 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o3 := o2.Args[0]
|
|
|
|
|
if o3.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o3.AuxInt != 32 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o4 := o3.Args[0]
|
|
|
|
|
if o4.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o4.AuxInt != 40 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o5 := o4.Args[0]
|
|
|
|
|
if o5.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o5.AuxInt != 48 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
s0 := o5.Args[0]
|
|
|
|
|
if s0.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if s0.AuxInt != 56 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y0 := s0.Args[0]
|
|
|
|
|
if y0.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x0 := y0.Args[0]
|
|
|
|
|
if x0.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i0 := x0.AuxInt
|
|
|
|
|
if x0.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x0.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x0.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-16 14:17:33 -04:00
|
|
|
y1 := o5.Args[1]
|
|
|
|
|
if y1.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x1 := y1.Args[0]
|
|
|
|
|
if x1.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i1 := x1.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x1.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x1.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y2 := o4.Args[1]
|
|
|
|
|
if y2.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x2 := y2.Args[0]
|
|
|
|
|
if x2.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i2 := x2.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x2.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x2.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x2.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y3 := o3.Args[1]
|
|
|
|
|
if y3.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x3 := y3.Args[0]
|
|
|
|
|
if x3.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i3 := x3.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x3.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x3.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x3.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y4 := o2.Args[1]
|
|
|
|
|
if y4.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x4 := y4.Args[0]
|
|
|
|
|
if x4.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i4 := x4.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x4.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x4.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x4.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y5 := o1.Args[1]
|
|
|
|
|
if y5.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x5 := y5.Args[0]
|
|
|
|
|
if x5.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i5 := x5.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x5.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x5.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x5.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y6 := o0.Args[1]
|
|
|
|
|
if y6.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x6 := y6.Args[0]
|
|
|
|
|
if x6.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i6 := x6.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x6.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x6.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x6.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64REV, t)
|
2016-08-16 14:17:33 -04:00
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
|
2016-08-16 14:17:33 -04:00
|
|
|
v1.Aux = s
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
2017-03-30 03:30:22 +00:00
|
|
|
v2.AuxInt = i0
|
2016-08-16 14:17:33 -04:00
|
|
|
v2.AddArg(p)
|
|
|
|
|
v1.AddArg(v2)
|
|
|
|
|
v1.AddArg(mem)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64ORconst_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORconst [0] x)
|
2016-08-16 14:17:33 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
if v.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORconst [-1] _)
|
2016-08-16 14:17:33 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [-1])
|
|
|
|
|
for {
|
|
|
|
|
if v.AuxInt != -1 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = -1
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORconst [c] (MOVDconst [d]))
|
2016-08-16 14:17:33 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [c|d])
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_0.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = c | d
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORconst [c] (ORconst [d] x))
|
2016-08-16 14:17:33 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (ORconst [c|d] x)
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ORconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_0.AuxInt
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v.reset(OpARM64ORconst)
|
|
|
|
|
v.AuxInt = c | d
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool {
|
2016-08-16 14:17:33 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftLL (MOVDconst [c]) x [d])
|
2016-08-16 14:17:33 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (ORconst [c] (SLLconst <x.Type> x [d]))
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ORconst)
|
|
|
|
|
v.AuxInt = c
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
|
2016-08-16 14:17:33 -04:00
|
|
|
v0.AuxInt = d
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftLL x (MOVDconst [c]) [d])
|
2016-08-16 14:17:33 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (ORconst x [int64(uint64(c)<<uint64(d))])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64ORconst)
|
|
|
|
|
v.AuxInt = int64(uint64(c) << uint64(d))
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftLL x y:(SLLconst x [c]) [d])
|
2016-08-16 14:17:33 -04:00
|
|
|
// cond: c==d
|
|
|
|
|
// result: y
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
if y.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := y.AuxInt
|
|
|
|
|
if x != y.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(c == d) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = y.Type
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftLL [c] (SRLconst x [64-c]) x)
|
2016-12-08 16:17:20 -08:00
|
|
|
// cond:
|
|
|
|
|
// result: (RORconst [64-c] x)
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_0.AuxInt != 64-c {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
if x != v.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64RORconst)
|
|
|
|
|
v.AuxInt = 64 - c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftLL <t> [c] (SRLconst (MOVWUreg x) [32-c]) x)
|
2017-04-28 00:19:49 +00:00
|
|
|
// cond: c < 32 && t.Size() == 4
|
2016-12-08 16:17:20 -08:00
|
|
|
// result: (RORWconst [32-c] x)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_0.AuxInt != 32-c {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v_0_0 := v_0.Args[0]
|
|
|
|
|
if v_0_0.Op != OpARM64MOVWUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0_0.Args[0]
|
|
|
|
|
if x != v.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-04-28 00:19:49 +00:00
|
|
|
if !(c < 32 && t.Size() == 4) {
|
2016-12-08 16:17:20 -08:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64RORWconst)
|
|
|
|
|
v.AuxInt = 32 - c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
|
|
|
|
|
// cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
|
|
|
|
|
// result: @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
|
2016-08-16 14:17:33 -04:00
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
if v.AuxInt != 8 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y0 := v.Args[0]
|
|
|
|
|
if y0.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x0 := y0.Args[0]
|
|
|
|
|
if x0.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i0 := x0.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
s := x0.Aux
|
|
|
|
|
p := x0.Args[0]
|
|
|
|
|
mem := x0.Args[1]
|
|
|
|
|
y1 := v.Args[1]
|
|
|
|
|
if y1.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x1 := y1.Args[0]
|
|
|
|
|
if x1.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i1 := x1.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x1.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x1.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = mergePoint(b, x0, x1)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVHUload, t)
|
2016-08-16 14:17:33 -04:00
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v0.Aux = s
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
2017-03-30 03:30:22 +00:00
|
|
|
v1.AuxInt = i0
|
2016-08-16 14:17:33 -04:00
|
|
|
v1.AddArg(p)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v0.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem)))
|
|
|
|
|
// cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)
|
|
|
|
|
// result: @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
|
2016-08-16 14:17:33 -04:00
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
if v.AuxInt != 24 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o0 := v.Args[0]
|
|
|
|
|
if o0.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o0.AuxInt != 16 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x0 := o0.Args[0]
|
|
|
|
|
if x0.Op != OpARM64MOVHUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i0 := x0.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
s := x0.Aux
|
|
|
|
|
p := x0.Args[0]
|
|
|
|
|
mem := x0.Args[1]
|
|
|
|
|
y1 := o0.Args[1]
|
|
|
|
|
if y1.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x1 := y1.Args[0]
|
|
|
|
|
if x1.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i2 := x1.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x1.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x1.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y2 := v.Args[1]
|
|
|
|
|
if y2.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x2 := y2.Args[0]
|
|
|
|
|
if x2.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i3 := x2.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x2.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x2.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x2.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = mergePoint(b, x0, x1, x2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
|
2016-08-16 14:17:33 -04:00
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v0.Aux = s
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
2017-03-30 03:30:22 +00:00
|
|
|
v1.AuxInt = i0
|
2016-08-16 14:17:33 -04:00
|
|
|
v1.AddArg(p)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v0.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem)))
|
|
|
|
|
// cond: i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
|
|
|
|
|
// result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
|
2016-08-16 14:17:33 -04:00
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
if v.AuxInt != 56 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o0 := v.Args[0]
|
|
|
|
|
if o0.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o0.AuxInt != 48 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o1 := o0.Args[0]
|
|
|
|
|
if o1.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o1.AuxInt != 40 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o2 := o1.Args[0]
|
|
|
|
|
if o2.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o2.AuxInt != 32 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x0 := o2.Args[0]
|
|
|
|
|
if x0.Op != OpARM64MOVWUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i0 := x0.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
s := x0.Aux
|
|
|
|
|
p := x0.Args[0]
|
|
|
|
|
mem := x0.Args[1]
|
|
|
|
|
y1 := o2.Args[1]
|
|
|
|
|
if y1.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x1 := y1.Args[0]
|
|
|
|
|
if x1.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i4 := x1.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x1.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x1.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y2 := o1.Args[1]
|
|
|
|
|
if y2.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x2 := y2.Args[0]
|
|
|
|
|
if x2.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i5 := x2.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x2.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x2.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x2.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y3 := o0.Args[1]
|
|
|
|
|
if y3.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x3 := y3.Args[0]
|
|
|
|
|
if x3.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i6 := x3.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x3.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x3.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x3.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y4 := v.Args[1]
|
|
|
|
|
if y4.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x4 := y4.Args[0]
|
|
|
|
|
if x4.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i7 := x4.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x4.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x4.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x4.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if !(i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = mergePoint(b, x0, x1, x2, x3, x4)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
|
2016-08-16 14:17:33 -04:00
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v0.Aux = s
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
2017-03-30 03:30:22 +00:00
|
|
|
v1.AuxInt = i0
|
2016-08-16 14:17:33 -04:00
|
|
|
v1.AddArg(p)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v0.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [i1] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i0] {s} p mem)))
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
// cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
|
2017-03-30 03:30:22 +00:00
|
|
|
// result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem))
|
2016-08-16 14:17:33 -04:00
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
if v.AuxInt != 8 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y0 := v.Args[0]
|
|
|
|
|
if y0.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x0 := y0.Args[0]
|
|
|
|
|
if x0.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i1 := x0.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
s := x0.Aux
|
|
|
|
|
p := x0.Args[0]
|
|
|
|
|
mem := x0.Args[1]
|
|
|
|
|
y1 := v.Args[1]
|
|
|
|
|
if y1.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x1 := y1.Args[0]
|
|
|
|
|
if x1.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i0 := x1.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x1.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x1.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
cmd/internal/obj/arm64, cmd/compile: improve offset folding on ARM64
ARM64 assembler backend only accepts loads and stores with small
or aligned offset. The compiler therefore can only fold small or
aligned offsets into loads and stores. For locals and args, their
offsets to SP are not known until very late, and the compiler
makes conservative decision not folding some of them. However,
in most cases, the offset is indeed small or aligned, and can
be folded into load and store (but actually not).
This CL adds support of loads and stores with large and unaligned
offsets. When the offset doesn't fit into the instruction, it
uses two instructions and (for very large offset) the constant
pool. This way, the compiler doesn't need to be conservative,
and can simply fold the offset.
To make it work, the assembler's optab matching rules need to be
changed. Before, MOVD accepts C_UAUTO32K which matches multiple
of 8 between 0 and 32K, and also C_UAUTO16K, which may not be
multiple of 8 and does not fit into MOVD instruction. The
assembler errors in the latter case. This change makes it only
matches multiple of 8 (or offsets within ±256, which also fits
in instruction), and uses the large-or-unaligned-offset rule
for things doesn't fit (without error). Other sized move rules
are changed similarly.
Class C_UAUTO64K and C_UOREG64K are removed, as they are never
used.
In shared library, load/store of global is rewritten to using
GOT and temp register, which conflicts with the use of temp
register for assembling large offset. So the folding is disabled
for globals in shared library mode.
Reduce cmd/go binary size by 2%.
name old time/op new time/op delta
BinaryTree17-8 8.67s ± 0% 8.61s ± 0% -0.60% (p=0.000 n=9+10)
Fannkuch11-8 6.24s ± 0% 6.19s ± 0% -0.83% (p=0.000 n=10+9)
FmtFprintfEmpty-8 116ns ± 0% 116ns ± 0% ~ (all equal)
FmtFprintfString-8 196ns ± 0% 192ns ± 0% -1.89% (p=0.000 n=10+10)
FmtFprintfInt-8 199ns ± 0% 198ns ± 0% -0.35% (p=0.001 n=9+10)
FmtFprintfIntInt-8 294ns ± 0% 293ns ± 0% -0.34% (p=0.000 n=8+8)
FmtFprintfPrefixedInt-8 318ns ± 1% 318ns ± 1% ~ (p=1.000 n=10+10)
FmtFprintfFloat-8 537ns ± 0% 531ns ± 0% -1.17% (p=0.000 n=9+10)
FmtManyArgs-8 1.19µs ± 1% 1.18µs ± 1% -1.41% (p=0.001 n=10+10)
GobDecode-8 17.2ms ± 1% 17.3ms ± 2% ~ (p=0.165 n=10+10)
GobEncode-8 14.7ms ± 1% 14.7ms ± 2% ~ (p=0.631 n=10+10)
Gzip-8 837ms ± 0% 836ms ± 0% -0.14% (p=0.006 n=9+10)
Gunzip-8 141ms ± 0% 139ms ± 0% -1.24% (p=0.000 n=9+10)
HTTPClientServer-8 256µs ± 1% 253µs ± 1% -1.35% (p=0.000 n=10+10)
JSONEncode-8 40.1ms ± 1% 41.3ms ± 1% +3.06% (p=0.000 n=10+9)
JSONDecode-8 157ms ± 1% 156ms ± 1% -0.83% (p=0.001 n=9+8)
Mandelbrot200-8 8.94ms ± 0% 8.94ms ± 0% +0.02% (p=0.000 n=9+9)
GoParse-8 8.69ms ± 0% 8.54ms ± 1% -1.69% (p=0.000 n=8+10)
RegexpMatchEasy0_32-8 227ns ± 1% 228ns ± 1% +0.48% (p=0.016 n=10+9)
RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.08% (p=0.000 n=10+9)
RegexpMatchEasy1_32-8 256ns ± 0% 251ns ± 0% -2.19% (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8 2.38µs ± 0% 2.09µs ± 0% -12.49% (p=0.000 n=10+9)
RegexpMatchMedium_32-8 352ns ± 0% 354ns ± 0% +0.39% (p=0.002 n=10+9)
RegexpMatchMedium_1K-8 106µs ± 0% 106µs ± 0% -0.05% (p=0.005 n=10+9)
RegexpMatchHard_32-8 5.92µs ± 0% 5.89µs ± 0% -0.40% (p=0.000 n=9+8)
RegexpMatchHard_1K-8 180µs ± 0% 179µs ± 0% -0.14% (p=0.000 n=10+9)
Revcomp-8 1.20s ± 0% 1.13s ± 0% -6.29% (p=0.000 n=9+8)
Template-8 159ms ± 1% 154ms ± 1% -3.14% (p=0.000 n=9+10)
TimeParse-8 800ns ± 3% 769ns ± 1% -3.91% (p=0.000 n=10+10)
TimeFormat-8 826ns ± 2% 817ns ± 2% -1.04% (p=0.050 n=10+10)
[Geo mean] 145µs 143µs -1.79%
Change-Id: I5fc42087cee9b54ea414f8ef6d6d020b80eb5985
Reviewed-on: https://go-review.googlesource.com/42172
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
2017-04-28 18:02:00 -04:00
|
|
|
if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = mergePoint(b, x0, x1)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64REV16W, t)
|
2016-08-16 14:17:33 -04:00
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVHUload, t)
|
2017-03-30 03:30:22 +00:00
|
|
|
v1.AuxInt = i0
|
2016-08-16 14:17:33 -04:00
|
|
|
v1.Aux = s
|
|
|
|
|
v1.AddArg(p)
|
|
|
|
|
v1.AddArg(mem)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] y0:(REV16W x0:(MOVHUload [i2] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i0] {s} p mem)))
|
|
|
|
|
// cond: i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)
|
|
|
|
|
// result: @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
|
2016-08-16 14:17:33 -04:00
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
if v.AuxInt != 24 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o0 := v.Args[0]
|
|
|
|
|
if o0.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o0.AuxInt != 16 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y0 := o0.Args[0]
|
|
|
|
|
if y0.Op != OpARM64REV16W {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x0 := y0.Args[0]
|
|
|
|
|
if x0.Op != OpARM64MOVHUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i2 := x0.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
s := x0.Aux
|
|
|
|
|
p := x0.Args[0]
|
|
|
|
|
mem := x0.Args[1]
|
|
|
|
|
y1 := o0.Args[1]
|
|
|
|
|
if y1.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x1 := y1.Args[0]
|
|
|
|
|
if x1.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i1 := x1.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x1.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x1.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y2 := v.Args[1]
|
|
|
|
|
if y2.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x2 := y2.Args[0]
|
|
|
|
|
if x2.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i0 := x2.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x2.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x2.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x2.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if !(i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = mergePoint(b, x0, x1, x2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
|
2016-08-16 14:17:33 -04:00
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
|
2016-08-16 14:17:33 -04:00
|
|
|
v1.Aux = s
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
2017-03-30 03:30:22 +00:00
|
|
|
v2.AuxInt = i0
|
2016-08-16 14:17:33 -04:00
|
|
|
v2.AddArg(p)
|
|
|
|
|
v1.AddArg(v2)
|
|
|
|
|
v1.AddArg(mem)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
|
|
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUload [i4] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i3] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i1] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i0] {s} p mem)))
|
|
|
|
|
// cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
|
|
|
|
|
// result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
|
2016-08-16 14:17:33 -04:00
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
if v.AuxInt != 56 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o0 := v.Args[0]
|
|
|
|
|
if o0.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o0.AuxInt != 48 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o1 := o0.Args[0]
|
|
|
|
|
if o1.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o1.AuxInt != 40 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
o2 := o1.Args[0]
|
|
|
|
|
if o2.Op != OpARM64ORshiftLL {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if o2.AuxInt != 32 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y0 := o2.Args[0]
|
|
|
|
|
if y0.Op != OpARM64REVW {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x0 := y0.Args[0]
|
|
|
|
|
if x0.Op != OpARM64MOVWUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i4 := x0.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
s := x0.Aux
|
|
|
|
|
p := x0.Args[0]
|
|
|
|
|
mem := x0.Args[1]
|
|
|
|
|
y1 := o2.Args[1]
|
|
|
|
|
if y1.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x1 := y1.Args[0]
|
|
|
|
|
if x1.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i3 := x1.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x1.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x1.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y2 := o1.Args[1]
|
|
|
|
|
if y2.Op != OpARM64MOVDnop {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x2 := y2.Args[0]
|
|
|
|
|
if x2.Op != OpARM64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i2 := x2.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x2.Aux != s {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if p != x2.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x2.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y3 := o0.Args[1]
|
|
|
|
|
if y3.Op != OpARM64MOVDnop {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-16 14:17:33 -04:00
|
|
|
x3 := y3.Args[0]
|
|
|
|
|
if x3.Op != OpARM64MOVBUload {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i1 := x3.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x3.Aux != s {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-16 14:17:33 -04:00
|
|
|
if p != x3.Args[0] {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-16 14:17:33 -04:00
|
|
|
if mem != x3.Args[1] {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-16 14:17:33 -04:00
|
|
|
y4 := v.Args[1]
|
|
|
|
|
if y4.Op != OpARM64MOVDnop {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-16 14:17:33 -04:00
|
|
|
x4 := y4.Args[0]
|
|
|
|
|
if x4.Op != OpARM64MOVBUload {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
i0 := x4.AuxInt
|
2016-08-16 14:17:33 -04:00
|
|
|
if x4.Aux != s {
|
2016-08-10 13:24:03 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-16 14:17:33 -04:00
|
|
|
if p != x4.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if mem != x4.Args[1] {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = mergePoint(b, x0, x1, x2, x3, x4)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64REV, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpCopy)
|
2016-08-16 14:17:33 -04:00
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
|
2016-08-16 14:17:33 -04:00
|
|
|
v1.Aux = s
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
2017-03-30 03:30:22 +00:00
|
|
|
v2.AuxInt = i0
|
2016-08-16 14:17:33 -04:00
|
|
|
v2.AddArg(p)
|
|
|
|
|
v1.AddArg(v2)
|
|
|
|
|
v1.AddArg(mem)
|
|
|
|
|
v0.AddArg(v1)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64ORshiftRA_0(v *Value) bool {
|
2016-07-22 06:41:14 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftRA (MOVDconst [c]) x [d])
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (ORconst [c] (SRAconst <x.Type> x [d]))
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ORconst)
|
|
|
|
|
v.AuxInt = c
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRAconst, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = d
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftRA x (MOVDconst [c]) [d])
|
2016-08-10 13:24:03 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (ORconst x [int64(int64(c)>>uint64(d))])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
2016-07-22 06:41:14 -04:00
|
|
|
x := v.Args[0]
|
2016-08-10 13:24:03 -04:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64ORconst)
|
|
|
|
|
v.AuxInt = int64(int64(c) >> uint64(d))
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftRA x y:(SRAconst x [c]) [d])
|
2016-08-10 13:24:03 -04:00
|
|
|
// cond: c==d
|
|
|
|
|
// result: y
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
if y.Op != OpARM64SRAconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
c := y.AuxInt
|
|
|
|
|
if x != y.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(c == d) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = y.Type
|
|
|
|
|
v.AddArg(y)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64ORshiftRL_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftRL (MOVDconst [c]) x [d])
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (ORconst [c] (SRLconst <x.Type> x [d]))
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v.AuxInt
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ORconst)
|
|
|
|
|
v.AuxInt = c
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRLconst, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = d
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftRL x (MOVDconst [c]) [d])
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (ORconst x [int64(uint64(c)>>uint64(d))])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
c := v_1.AuxInt
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64ORconst)
|
2016-08-10 13:24:03 -04:00
|
|
|
v.AuxInt = int64(uint64(c) >> uint64(d))
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftRL x y:(SRLconst x [c]) [d])
|
2016-08-10 13:24:03 -04:00
|
|
|
// cond: c==d
|
|
|
|
|
// result: y
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
if y.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := y.AuxInt
|
|
|
|
|
if x != y.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(c == d) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = y.Type
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftRL [c] (SLLconst x [64-c]) x)
|
2016-12-08 16:17:20 -08:00
|
|
|
// cond:
|
|
|
|
|
// result: (RORconst [ c] x)
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_0.AuxInt != 64-c {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
if x != v.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64RORconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (ORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x))
|
2017-04-28 00:19:49 +00:00
|
|
|
// cond: c < 32 && t.Size() == 4
|
2016-12-08 16:17:20 -08:00
|
|
|
// result: (RORWconst [ c] x)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_0.AuxInt != 32-c {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVWUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if x != v_1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-04-28 00:19:49 +00:00
|
|
|
if !(c < 32 && t.Size() == 4) {
|
2016-12-08 16:17:20 -08:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64RORWconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64SLL_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (SLL x (MOVDconst [c]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (SLLconst x [c&63])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = c & 63
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64SLLconst_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (SLLconst [c] (MOVDconst [d]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [int64(d)<<uint64(c)])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_0.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = int64(d) << uint64(c)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-04-06 09:36:23 -04:00
|
|
|
// match: (SLLconst [c] (SRLconst [c] x))
|
|
|
|
|
// cond: 0 < c && c < 64
|
|
|
|
|
// result: (ANDconst [^(1<<uint(c)-1)] x)
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_0.AuxInt != c {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
if !(0 < c && c < 64) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64ANDconst)
|
|
|
|
|
v.AuxInt = ^(1<<uint(c) - 1)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64SRA_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (SRA x (MOVDconst [c]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (SRAconst x [c&63])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64SRAconst)
|
|
|
|
|
v.AuxInt = c & 63
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64SRAconst_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (SRAconst [c] (MOVDconst [d]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [int64(d)>>uint64(c)])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_0.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = int64(d) >> uint64(c)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64SRL_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (SRL x (MOVDconst [c]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (SRLconst x [c&63])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64SRLconst)
|
|
|
|
|
v.AuxInt = c & 63
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64SRLconst_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (SRLconst [c] (MOVDconst [d]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [int64(uint64(d)>>uint64(c))])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_0.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = int64(uint64(d) >> uint64(c))
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-04-06 09:36:23 -04:00
|
|
|
// match: (SRLconst [c] (SLLconst [c] x))
|
|
|
|
|
// cond: 0 < c && c < 64
|
|
|
|
|
// result: (ANDconst [1<<uint(64-c)-1] x)
|
2017-04-03 11:50:54 -04:00
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
2017-04-06 09:36:23 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SLLconst {
|
2017-04-03 11:50:54 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-04-06 09:36:23 -04:00
|
|
|
if v_0.AuxInt != c {
|
2017-04-03 11:50:54 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-04-06 09:36:23 -04:00
|
|
|
x := v_0.Args[0]
|
|
|
|
|
if !(0 < c && c < 64) {
|
2017-04-03 11:50:54 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-04-06 09:36:23 -04:00
|
|
|
v.reset(OpARM64ANDconst)
|
|
|
|
|
v.AuxInt = 1<<uint(64-c) - 1
|
|
|
|
|
v.AddArg(x)
|
2017-04-03 11:50:54 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64SUB_0(v *Value) bool {
|
2016-07-22 06:41:14 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (SUB x (MOVDconst [c]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (SUBconst [c] x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64SUBconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (SUB x x)
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x != v.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-16 14:08:31 -07:00
|
|
|
// match: (SUB x (SUB y z))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SUB (ADD <v.Type> x z) y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SUB {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
z := v_1.Args[1]
|
|
|
|
|
v.reset(OpARM64SUB)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADD, v.Type)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(z)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB (SUB x y) z)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SUB x (ADD <y.Type> y z))
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SUB {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
y := v_0.Args[1]
|
|
|
|
|
z := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SUB)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADD, y.Type)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v0.AddArg(z)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (SUB x (SLLconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SUBshiftLL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64SUBshiftLL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB x (SRLconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SUBshiftRL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64SUBshiftRL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB x (SRAconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SUBshiftRA x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRAconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64SUBshiftRA)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64SUBconst_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (SUBconst [0] x)
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: x
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-07-22 06:41:14 -04:00
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (SUBconst [c] (MOVDconst [d]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [d-c])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v_0.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = d - c
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUBconst [c] (SUBconst [d] x))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ADDconst [-c-d] x)
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SUBconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_0.AuxInt
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v.reset(OpARM64ADDconst)
|
|
|
|
|
v.AuxInt = -c - d
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUBconst [c] (ADDconst [d] x))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ADDconst [-c+d] x)
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64ADDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_0.AuxInt
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v.reset(OpARM64ADDconst)
|
|
|
|
|
v.AuxInt = -c + d
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64SUBshiftLL_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (SUBshiftLL x (MOVDconst [c]) [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SUBconst x [int64(uint64(c)<<uint64(d))])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64SUBconst)
|
|
|
|
|
v.AuxInt = int64(uint64(c) << uint64(d))
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUBshiftLL x (SLLconst x [c]) [d])
|
|
|
|
|
// cond: c==d
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if x != v_1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(c == d) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64SUBshiftRA_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (SUBshiftRA x (MOVDconst [c]) [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SUBconst x [int64(int64(c)>>uint64(d))])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64SUBconst)
|
|
|
|
|
v.AuxInt = int64(int64(c) >> uint64(d))
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUBshiftRA x (SRAconst x [c]) [d])
|
|
|
|
|
// cond: c==d
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRAconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if x != v_1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(c == d) {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVDconst)
|
2016-08-10 13:24:03 -04:00
|
|
|
v.AuxInt = 0
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64SUBshiftRL_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (SUBshiftRL x (MOVDconst [c]) [d])
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-10 13:24:03 -04:00
|
|
|
// result: (SUBconst x [int64(uint64(c)>>uint64(d))])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64SUBconst)
|
|
|
|
|
v.AuxInt = int64(uint64(c) >> uint64(d))
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (SUBshiftRL x (SRLconst x [c]) [d])
|
|
|
|
|
// cond: c==d
|
|
|
|
|
// result: (MOVDconst [0])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-10 13:24:03 -04:00
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRLconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if x != v_1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(c == d) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64UDIV_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UDIV x (MOVDconst [1]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: x
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_1.AuxInt != 1 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UDIV x (MOVDconst [c]))
|
|
|
|
|
// cond: isPowerOfTwo(c)
|
|
|
|
|
// result: (SRLconst [log2(c)] x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(isPowerOfTwo(c)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SRLconst)
|
|
|
|
|
v.AuxInt = log2(c)
|
|
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (UDIV (MOVDconst [c]) (MOVDconst [d]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [int64(uint64(c)/uint64(d))])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = int64(uint64(c) / uint64(d))
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64UDIVW_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UDIVW x (MOVDconst [c]))
|
|
|
|
|
// cond: uint32(c)==1
|
|
|
|
|
// result: x
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint32(c) == 1) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UDIVW x (MOVDconst [c]))
|
|
|
|
|
// cond: isPowerOfTwo(c) && is32Bit(c)
|
|
|
|
|
// result: (SRLconst [log2(c)] x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(isPowerOfTwo(c) && is32Bit(c)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SRLconst)
|
|
|
|
|
v.AuxInt = log2(c)
|
|
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UDIVW (MOVDconst [c]) (MOVDconst [d]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [int64(uint32(c)/uint32(d))])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = int64(uint32(c) / uint32(d))
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64UMOD_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UMOD _ (MOVDconst [1]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_1.AuxInt != 1 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UMOD x (MOVDconst [c]))
|
|
|
|
|
// cond: isPowerOfTwo(c)
|
|
|
|
|
// result: (ANDconst [c-1] x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(isPowerOfTwo(c)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64ANDconst)
|
|
|
|
|
v.AuxInt = c - 1
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (UMOD (MOVDconst [c]) (MOVDconst [d]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [int64(uint64(c)%uint64(d))])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = int64(uint64(c) % uint64(d))
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64UMODW_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UMODW _ (MOVDconst [c]))
|
|
|
|
|
// cond: uint32(c)==1
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint32(c) == 1) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (UMODW x (MOVDconst [c]))
|
|
|
|
|
// cond: isPowerOfTwo(c) && is32Bit(c)
|
|
|
|
|
// result: (ANDconst [c-1] x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(isPowerOfTwo(c) && is32Bit(c)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64ANDconst)
|
|
|
|
|
v.AuxInt = c - 1
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UMODW (MOVDconst [c]) (MOVDconst [d]))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [int64(uint32(c)%uint32(d))])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = int64(uint32(c) % uint32(d))
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
2016-07-22 06:41:14 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64XOR_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (XOR x (MOVDconst [c]))
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (XORconst [c] x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
c := v_1.AuxInt
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64XORconst)
|
|
|
|
|
v.AuxInt = c
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (XOR (MOVDconst [c]) x)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (XORconst [c] x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2017-03-30 03:30:22 +00:00
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64XORconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (XOR x x)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
if x != v.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
// match: (XOR x (SLLconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XORshiftLL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64XORshiftLL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (XOR (SLLconst [c] y) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XORshiftLL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
y := v_0.Args[0]
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64XORshiftLL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (XOR x (SRLconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XORshiftRL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64XORshiftRL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (XOR (SRLconst [c] y) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XORshiftRL x y [c])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
y := v_0.Args[0]
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64XORshiftRL)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (XOR x (SRAconst [c] y))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XORshiftRA x y [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRAconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
y := v_1.Args[0]
|
|
|
|
|
v.reset(OpARM64XORshiftRA)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (XOR (SRAconst [c] y) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XORshiftRA x y [c])
|
|
|
|
|
for {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SRAconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
y := v_0.Args[0]
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64XORshiftRA)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64XORconst_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (XORconst [0] x)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: x
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.AuxInt != 0 {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (XORconst [-1] x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MVN x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.AuxInt != -1 {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64MVN)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (XORconst [c] (MOVDconst [d]))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [c^d])
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
2016-07-22 06:41:14 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
d := v_0.AuxInt
|
2016-07-22 06:41:14 -04:00
|
|
|
v.reset(OpARM64MOVDconst)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AuxInt = c ^ d
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (XORconst [c] (XORconst [d] x))
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (XORconst [c^d] x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64XORconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
d := v_0.AuxInt
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v.reset(OpARM64XORconst)
|
|
|
|
|
v.AuxInt = c ^ d
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
return false
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64XORshiftLL_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (XORshiftLL (MOVDconst [c]) x [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XORconst [c] (SLLconst <x.Type> x [d]))
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64XORconst)
|
|
|
|
|
v.AuxInt = c
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = d
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (XORshiftLL x (MOVDconst [c]) [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XORconst x [int64(uint64(c)<<uint64(d))])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64XORconst)
|
|
|
|
|
v.AuxInt = int64(uint64(c) << uint64(d))
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (XORshiftLL x (SLLconst x [c]) [d])
|
|
|
|
|
// cond: c==d
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if x != v_1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(c == d) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-12-08 16:17:20 -08:00
|
|
|
// match: (XORshiftLL [c] (SRLconst x [64-c]) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (RORconst [64-c] x)
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_0.AuxInt != 64-c {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
if x != v.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64RORconst)
|
|
|
|
|
v.AuxInt = 64 - c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (XORshiftLL <t> [c] (SRLconst (MOVWUreg x) [32-c]) x)
|
2017-04-28 00:19:49 +00:00
|
|
|
// cond: c < 32 && t.Size() == 4
|
2016-12-08 16:17:20 -08:00
|
|
|
// result: (RORWconst [32-c] x)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_0.AuxInt != 32-c {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v_0_0 := v_0.Args[0]
|
|
|
|
|
if v_0_0.Op != OpARM64MOVWUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0_0.Args[0]
|
|
|
|
|
if x != v.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-04-28 00:19:49 +00:00
|
|
|
if !(c < 32 && t.Size() == 4) {
|
2016-12-08 16:17:20 -08:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64RORWconst)
|
|
|
|
|
v.AuxInt = 32 - c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64XORshiftRA_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (XORshiftRA (MOVDconst [c]) x [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XORconst [c] (SRAconst <x.Type> x [d]))
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64XORconst)
|
|
|
|
|
v.AuxInt = c
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRAconst, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = d
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (XORshiftRA x (MOVDconst [c]) [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XORconst x [int64(int64(c)>>uint64(d))])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64XORconst)
|
|
|
|
|
v.AuxInt = int64(int64(c) >> uint64(d))
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (XORshiftRA x (SRAconst x [c]) [d])
|
|
|
|
|
// cond: c==d
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRAconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if x != v_1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(c == d) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpARM64XORshiftRL_0(v *Value) bool {
|
2016-08-10 13:24:03 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (XORshiftRL (MOVDconst [c]) x [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XORconst [c] (SRLconst <x.Type> x [d]))
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_0.AuxInt
|
|
|
|
|
x := v.Args[1]
|
|
|
|
|
v.reset(OpARM64XORconst)
|
|
|
|
|
v.AuxInt = c
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRLconst, x.Type)
|
2016-08-10 13:24:03 -04:00
|
|
|
v0.AuxInt = d
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (XORshiftRL x (MOVDconst [c]) [d])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XORconst x [int64(uint64(c)>>uint64(d))])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
v.reset(OpARM64XORconst)
|
|
|
|
|
v.AuxInt = int64(uint64(c) >> uint64(d))
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (XORshiftRL x (SRLconst x [c]) [d])
|
|
|
|
|
// cond: c==d
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
d := v.AuxInt
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64SRLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if x != v_1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !(c == d) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-12-08 16:17:20 -08:00
|
|
|
// match: (XORshiftRL [c] (SLLconst x [64-c]) x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (RORconst [ c] x)
|
|
|
|
|
for {
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_0.AuxInt != 64-c {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
if x != v.Args[1] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64RORconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (XORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x))
|
2017-04-28 00:19:49 +00:00
|
|
|
// cond: c < 32 && t.Size() == 4
|
2016-12-08 16:17:20 -08:00
|
|
|
// result: (RORWconst [ c] x)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
if v_0.Op != OpARM64SLLconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v_0.AuxInt != 32-c {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVWUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if x != v_1.Args[0] {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-04-28 00:19:49 +00:00
|
|
|
if !(c < 32 && t.Size() == 4) {
|
2016-12-08 16:17:20 -08:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64RORWconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-10 13:24:03 -04:00
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAdd16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Add16 x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (ADD x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64ADD)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAdd32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Add32 x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (ADD x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64ADD)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAdd32F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Add32F x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FADDS x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FADDS)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAdd64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Add64 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ADD x y)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64ADD)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAdd64F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Add64F x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (FADDD x y)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64FADDD)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAdd8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Add8 x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (ADD x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64ADD)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAddPtr_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (AddPtr x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (ADD x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64ADD)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAddr_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Addr {sym} base)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDaddr {sym} base)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
sym := v.Aux
|
|
|
|
|
base := v.Args[0]
|
|
|
|
|
v.reset(OpARM64MOVDaddr)
|
|
|
|
|
v.Aux = sym
|
|
|
|
|
v.AddArg(base)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAnd16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (And16 x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (AND x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64AND)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAnd32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (And32 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (AND x y)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64AND)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAnd64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (And64 x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (AND x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64AND)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAnd8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (And8 x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (AND x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64AND)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAndB_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (AndB x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (AND x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64AND)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAtomicAdd32_0(v *Value) bool {
|
2016-08-29 16:26:57 -04:00
|
|
|
// match: (AtomicAdd32 ptr val mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LoweredAtomicAdd32 ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64LoweredAtomicAdd32)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAtomicAdd64_0(v *Value) bool {
|
2016-08-29 16:26:57 -04:00
|
|
|
// match: (AtomicAdd64 ptr val mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LoweredAtomicAdd64 ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64LoweredAtomicAdd64)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAtomicAnd8_0(v *Value) bool {
|
2016-09-12 15:24:11 -04:00
|
|
|
// match: (AtomicAnd8 ptr val mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LoweredAtomicAnd8 ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64LoweredAtomicAnd8)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAtomicCompareAndSwap32_0(v *Value) bool {
|
2016-08-29 16:26:57 -04:00
|
|
|
// match: (AtomicCompareAndSwap32 ptr old new_ mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LoweredAtomicCas32 ptr old new_ mem)
|
|
|
|
|
for {
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
old := v.Args[1]
|
|
|
|
|
new_ := v.Args[2]
|
|
|
|
|
mem := v.Args[3]
|
|
|
|
|
v.reset(OpARM64LoweredAtomicCas32)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(old)
|
|
|
|
|
v.AddArg(new_)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAtomicCompareAndSwap64_0(v *Value) bool {
|
2016-08-29 16:26:57 -04:00
|
|
|
// match: (AtomicCompareAndSwap64 ptr old new_ mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LoweredAtomicCas64 ptr old new_ mem)
|
|
|
|
|
for {
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
old := v.Args[1]
|
|
|
|
|
new_ := v.Args[2]
|
|
|
|
|
mem := v.Args[3]
|
|
|
|
|
v.reset(OpARM64LoweredAtomicCas64)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(old)
|
|
|
|
|
v.AddArg(new_)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAtomicExchange32_0(v *Value) bool {
|
2016-08-29 16:26:57 -04:00
|
|
|
// match: (AtomicExchange32 ptr val mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LoweredAtomicExchange32 ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64LoweredAtomicExchange32)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAtomicExchange64_0(v *Value) bool {
|
2016-08-29 16:26:57 -04:00
|
|
|
// match: (AtomicExchange64 ptr val mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LoweredAtomicExchange64 ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64LoweredAtomicExchange64)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAtomicLoad32_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (AtomicLoad32 ptr mem)
|
2016-08-29 16:26:57 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (LDARW ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LDARW)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAtomicLoad64_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (AtomicLoad64 ptr mem)
|
2016-08-29 16:26:57 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (LDAR ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LDAR)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAtomicLoadPtr_0(v *Value) bool {
|
2016-08-29 16:26:57 -04:00
|
|
|
// match: (AtomicLoadPtr ptr mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LDAR ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LDAR)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
2016-09-12 15:24:11 -04:00
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAtomicOr8_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (AtomicOr8 ptr val mem)
|
2016-09-12 15:24:11 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (LoweredAtomicOr8 ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64LoweredAtomicOr8)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
2016-08-29 16:26:57 -04:00
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAtomicStore32_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (AtomicStore32 ptr val mem)
|
2016-08-29 16:26:57 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (STLRW ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64STLRW)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAtomicStore64_0(v *Value) bool {
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (AtomicStore64 ptr val mem)
|
2016-08-29 16:26:57 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (STLR ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64STLR)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAtomicStorePtrNoWB_0(v *Value) bool {
|
2016-08-29 16:26:57 -04:00
|
|
|
// match: (AtomicStorePtrNoWB ptr val mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (STLR ptr val mem)
|
|
|
|
|
for {
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64STLR)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpAvg64u_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Avg64u <t> x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2017-02-13 16:00:09 -08:00
|
|
|
// result: (ADD (SRLconst <t> (SUB <t> x y) [1]) y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
t := v.Type
|
2016-07-21 12:42:49 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64ADD)
|
2017-02-13 16:00:09 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRLconst, t)
|
|
|
|
|
v0.AuxInt = 1
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64SUB, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
2017-02-13 16:00:09 -08:00
|
|
|
v1.AddArg(y)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2017-02-13 16:00:09 -08:00
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpBitLen64_0(v *Value) bool {
|
2017-03-16 14:08:31 -07:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-16 14:08:31 -07:00
|
|
|
// match: (BitLen64 x)
|
|
|
|
|
// cond:
|
2017-03-17 16:04:46 -07:00
|
|
|
// result: (SUB (MOVDconst [64]) (CLZ <types.Int> x))
|
2017-03-16 14:08:31 -07:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64SUB)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2017-03-16 14:08:31 -07:00
|
|
|
v0.AuxInt = 64
|
|
|
|
|
v.AddArg(v0)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64CLZ, types.Int)
|
2017-03-16 14:08:31 -07:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpBitRev16_0(v *Value) bool {
|
2017-03-16 22:34:38 -07:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-16 22:34:38 -07:00
|
|
|
// match: (BitRev16 x)
|
|
|
|
|
// cond:
|
2017-03-17 16:04:46 -07:00
|
|
|
// result: (SRLconst [48] (RBIT <types.UInt64> x))
|
2017-03-16 22:34:38 -07:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64SRLconst)
|
|
|
|
|
v.AuxInt = 48
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64RBIT, types.UInt64)
|
2017-03-16 22:34:38 -07:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpBitRev32_0(v *Value) bool {
|
2017-03-16 22:34:38 -07:00
|
|
|
// match: (BitRev32 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (RBITW x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64RBITW)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpBitRev64_0(v *Value) bool {
|
2017-03-16 22:34:38 -07:00
|
|
|
// match: (BitRev64 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (RBIT x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64RBIT)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpBitRev8_0(v *Value) bool {
|
2017-03-16 22:34:38 -07:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-16 22:34:38 -07:00
|
|
|
// match: (BitRev8 x)
|
|
|
|
|
// cond:
|
2017-03-17 16:04:46 -07:00
|
|
|
// result: (SRLconst [56] (RBIT <types.UInt64> x))
|
2017-03-16 22:34:38 -07:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64SRLconst)
|
|
|
|
|
v.AuxInt = 56
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64RBIT, types.UInt64)
|
2017-03-16 22:34:38 -07:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpBswap32_0(v *Value) bool {
|
2016-08-29 16:26:57 -04:00
|
|
|
// match: (Bswap32 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (REVW x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64REVW)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpBswap64_0(v *Value) bool {
|
2016-08-29 16:26:57 -04:00
|
|
|
// match: (Bswap64 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (REV x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64REV)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpClosureCall_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ClosureCall [argwid] entry closure mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CALLclosure [argwid] entry closure mem)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
argwid := v.AuxInt
|
|
|
|
|
entry := v.Args[0]
|
|
|
|
|
closure := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64CALLclosure)
|
|
|
|
|
v.AuxInt = argwid
|
|
|
|
|
v.AddArg(entry)
|
|
|
|
|
v.AddArg(closure)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCom16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Com16 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MVN x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MVN)
|
|
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCom32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Com32 x)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MVN x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MVN)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCom64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Com64 x)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MVN x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MVN)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCom8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Com8 x)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MVN x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MVN)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpConst16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Const16 [val])
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [val])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
val := v.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = val
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpConst32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Const32 [val])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [val])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
val := v.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = val
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpConst32F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Const32F [val])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (FMOVSconst [val])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
val := v.AuxInt
|
|
|
|
|
v.reset(OpARM64FMOVSconst)
|
|
|
|
|
v.AuxInt = val
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpConst64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Const64 [val])
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconst [val])
|
|
|
|
|
for {
|
|
|
|
|
val := v.AuxInt
|
2016-07-22 06:41:14 -04:00
|
|
|
v.reset(OpARM64MOVDconst)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AuxInt = val
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpConst64F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Const64F [val])
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FMOVDconst [val])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
val := v.AuxInt
|
|
|
|
|
v.reset(OpARM64FMOVDconst)
|
|
|
|
|
v.AuxInt = val
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpConst8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Const8 [val])
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [val])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
val := v.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = val
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpConstBool_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ConstBool [b])
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [b])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.AuxInt
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = b
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpConstNil_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ConstNil)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDconst [0])
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpConvert_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Convert x mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDconvert x mem)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
mem := v.Args[1]
|
|
|
|
|
v.reset(OpARM64MOVDconvert)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(x)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(mem)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCtz32_0(v *Value) bool {
|
2016-08-29 16:26:57 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Ctz32 <t> x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CLZW (RBITW <t> x))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64CLZW)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64RBITW, t)
|
2016-08-29 16:26:57 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCtz64_0(v *Value) bool {
|
2016-08-29 16:26:57 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Ctz64 <t> x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CLZ (RBIT <t> x))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64CLZ)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64RBIT, t)
|
2016-08-29 16:26:57 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt32Fto32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Cvt32Fto32 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (FCVTZSSW x)
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FCVTZSSW)
|
[dev.ssa] cmd/compile: refactor out rulegen value parsing
Previously, genMatch0 and genResult0 contained
lots of duplication: locating the op, parsing
the value, validation, etc.
Parsing and validation was mixed in with code gen.
Extract a helper, parseValue. It is responsible
for parsing the value, locating the op, and doing
shared validation.
As a bonus (and possibly as my original motivation),
make op selection pay attention to the number
of args present.
This allows arch-specific ops to share a name
with generic ops as long as there is no ambiguity.
It also detects and reports unresolved ambiguity,
unlike before, where it would simply always
pick the generic op, with no warning.
Also use parseValue when generating the top-level
op dispatch, to ensure its opinion about ops
matches genMatch0 and genResult0.
The order of statements in the generated code used
to depend on the exact rule. It is now somewhat
independent of the rule. That is the source
of some of the generated code changes in this CL.
See rewritedec64 and rewritegeneric for examples.
It is a one-time change.
The op dispatch switch and functions used to be
sorted by opname without architecture. The sort
now includes the architecture, leading to further
generated code changes.
See rewriteARM and rewriteAMD64 for examples.
Again, it is a one-time change.
There are no functional changes.
Change-Id: I22c989183ad5651741ebdc0566349c5fd6c6b23c
Reviewed-on: https://go-review.googlesource.com/24649
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
2016-07-01 11:05:29 -07:00
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt32Fto32U_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Cvt32Fto32U x)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FCVTZUSW x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FCVTZUSW)
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt32Fto64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Cvt32Fto64 x)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FCVTZSS x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FCVTZSS)
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt32Fto64F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Cvt32Fto64F x)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FCVTSD x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FCVTSD)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt32Fto64U_0(v *Value) bool {
|
2016-08-16 14:17:33 -04:00
|
|
|
// match: (Cvt32Fto64U x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (FCVTZUS x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64FCVTZUS)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt32Uto32F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Cvt32Uto32F x)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (UCVTFWS x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64UCVTFWS)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt32Uto64F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Cvt32Uto64F x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (UCVTFWD x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64UCVTFWD)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt32to32F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Cvt32to32F x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SCVTFWS x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64SCVTFWS)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt32to64F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Cvt32to64F x)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (SCVTFWD x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64SCVTFWD)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt64Fto32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Cvt64Fto32 x)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FCVTZSDW x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-07-22 06:41:14 -04:00
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FCVTZSDW)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt64Fto32F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Cvt64Fto32F x)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FCVTDS x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FCVTDS)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt64Fto32U_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Cvt64Fto32U x)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FCVTZUDW x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FCVTZUDW)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt64Fto64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Cvt64Fto64 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (FCVTZSD x)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64FCVTZSD)
|
|
|
|
|
v.AddArg(x)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt64Fto64U_0(v *Value) bool {
|
2016-08-16 14:17:33 -04:00
|
|
|
// match: (Cvt64Fto64U x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (FCVTZUD x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64FCVTZUD)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt64Uto32F_0(v *Value) bool {
|
2016-08-16 14:17:33 -04:00
|
|
|
// match: (Cvt64Uto32F x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (UCVTFS x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64UCVTFS)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt64Uto64F_0(v *Value) bool {
|
2016-08-16 14:17:33 -04:00
|
|
|
// match: (Cvt64Uto64F x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (UCVTFD x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64UCVTFD)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt64to32F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Cvt64to32F x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SCVTFS x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64SCVTFS)
|
|
|
|
|
v.AddArg(x)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpCvt64to64F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Cvt64to64F x)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (SCVTFD x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64SCVTFD)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpDiv16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Div16 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (DIVW (SignExt16to32 x) (SignExt16to32 y))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64DIVW)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to32, types.Int32)
|
2016-07-22 06:41:14 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt16to32, types.Int32)
|
2016-07-22 06:41:14 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v.AddArg(v1)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpDiv16u_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Div16u x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (UDIVW (ZeroExt16to32 x) (ZeroExt16to32 y))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64UDIVW)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt16to32, types.UInt32)
|
2016-07-22 06:41:14 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(v1)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpDiv32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Div32 x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (DIVW x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64DIVW)
|
2016-07-21 12:42:49 -04:00
|
|
|
v.AddArg(x)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpDiv32F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Div32F x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FDIVS x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64FDIVS)
|
2016-07-21 12:42:49 -04:00
|
|
|
v.AddArg(x)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpDiv32u_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Div32u x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (UDIVW x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64UDIVW)
|
2016-07-21 12:42:49 -04:00
|
|
|
v.AddArg(x)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpDiv64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Div64 x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (DIV x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64DIV)
|
2016-07-21 12:42:49 -04:00
|
|
|
v.AddArg(x)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpDiv64F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Div64F x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FDIVD x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64FDIVD)
|
2016-07-21 12:42:49 -04:00
|
|
|
v.AddArg(x)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpDiv64u_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Div64u x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (UDIV x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64UDIV)
|
2016-07-21 12:42:49 -04:00
|
|
|
v.AddArg(x)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpDiv8_0(v *Value) bool {
|
2016-07-22 06:41:14 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Div8 x y)
|
2016-07-22 06:41:14 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (DIVW (SignExt8to32 x) (SignExt8to32 y))
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64DIVW)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt8to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v.AddArg(v1)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpDiv8u_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Div8u x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (UDIVW (ZeroExt8to32 x) (ZeroExt8to32 y))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64UDIVW)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt8to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v.AddArg(v1)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpEq16_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Eq16 x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64Equal)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt16to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpEq32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Eq32 x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (Equal (CMPW x y))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64Equal)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpEq32F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Eq32F x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (Equal (FCMPS x y))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64Equal)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64FCMPS, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpEq64_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Eq64 x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (Equal (CMP x y))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64Equal)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMP, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpEq64F_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Eq64F x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (Equal (FCMPD x y))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64Equal)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64FCMPD, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpEq8_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Eq8 x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64Equal)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt8to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpEqB_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (EqB x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2017-03-17 16:04:46 -07:00
|
|
|
// result: (XOR (MOVDconst [1]) (XOR <types.Bool> x y))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64XOR)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AuxInt = 1
|
|
|
|
|
v.AddArg(v0)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64XOR, types.Bool)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v1.AddArg(y)
|
|
|
|
|
v.AddArg(v1)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpEqPtr_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (EqPtr x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (Equal (CMP x y))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64Equal)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMP, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGeq16_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Geq16 x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (GreaterEqual (CMPW (SignExt16to32 x) (SignExt16to32 y)))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64GreaterEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt16to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpSignExt16to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGeq16U_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Geq16U x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (GreaterEqualU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64GreaterEqualU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt16to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGeq32_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Geq32 x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (GreaterEqual (CMPW x y))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGeq32F_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Geq32F x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (GreaterEqual (FCMPS x y))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64FCMPS, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGeq32U_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Geq32U x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (GreaterEqualU (CMPW x y))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterEqualU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGeq64_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Geq64 x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (GreaterEqual (CMP x y))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMP, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGeq64F_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Geq64F x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (GreaterEqual (FCMPD x y))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64FCMPD, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGeq64U_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Geq64U x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (GreaterEqualU (CMP x y))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterEqualU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMP, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGeq8_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Geq8 x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (GreaterEqual (CMPW (SignExt8to32 x) (SignExt8to32 y)))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64GreaterEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt8to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpSignExt8to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGeq8U_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Geq8U x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (GreaterEqualU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64GreaterEqualU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt8to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGetClosurePtr_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (GetClosurePtr)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (LoweredGetClosurePtr)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64LoweredGetClosurePtr)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGreater16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Greater16 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GreaterThan (CMPW (SignExt16to32 x) (SignExt16to32 y)))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64GreaterThan)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt16to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpSignExt16to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGreater16U_0(v *Value) bool {
|
2016-07-22 06:41:14 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Greater16U x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GreaterThanU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterThanU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt16to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-22 06:41:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGreater32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Greater32 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GreaterThan (CMPW x y))
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterThan)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGreater32F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Greater32F x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GreaterThan (FCMPS x y))
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterThan)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64FCMPS, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGreater32U_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Greater32U x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GreaterThanU (CMPW x y))
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterThanU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGreater64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Greater64 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GreaterThan (CMP x y))
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterThan)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMP, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGreater64F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Greater64F x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GreaterThan (FCMPD x y))
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterThan)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64FCMPD, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGreater64U_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Greater64U x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GreaterThanU (CMP x y))
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterThanU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMP, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGreater8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Greater8 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GreaterThan (CMPW (SignExt8to32 x) (SignExt8to32 y)))
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterThan)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt8to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpSignExt8to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpGreater8U_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Greater8U x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GreaterThanU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterThanU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt8to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpHmul32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Hmul32 x y)
|
|
|
|
|
// cond:
|
2017-03-17 16:04:46 -07:00
|
|
|
// result: (SRAconst (MULL <types.Int64> x y) [32])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRAconst)
|
|
|
|
|
v.AuxInt = 32
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MULL, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpHmul32u_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Hmul32u x y)
|
|
|
|
|
// cond:
|
2017-03-17 16:04:46 -07:00
|
|
|
// result: (SRAconst (UMULL <types.UInt64> x y) [32])
|
2016-07-22 06:41:14 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRAconst)
|
|
|
|
|
v.AuxInt = 32
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64UMULL, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
2016-07-22 06:41:14 -04:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpHmul64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Hmul64 x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MULH x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64MULH)
|
2016-07-21 12:42:49 -04:00
|
|
|
v.AddArg(x)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpHmul64u_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Hmul64u x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (UMULH x y)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64UMULH)
|
2016-07-21 12:42:49 -04:00
|
|
|
v.AddArg(x)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(y)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpInterCall_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (InterCall [argwid] entry mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (CALLinter [argwid] entry mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
argwid := v.AuxInt
|
|
|
|
|
entry := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CALLinter)
|
|
|
|
|
v.AuxInt = argwid
|
|
|
|
|
v.AddArg(entry)
|
|
|
|
|
v.AddArg(mem)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpIsInBounds_0(v *Value) bool {
|
2016-07-21 12:42:49 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (IsInBounds idx len)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (LessThanU (CMP idx len))
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
2016-08-03 09:56:36 -04:00
|
|
|
idx := v.Args[0]
|
|
|
|
|
len := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessThanU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMP, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(idx)
|
|
|
|
|
v0.AddArg(len)
|
|
|
|
|
v.AddArg(v0)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpIsNonNil_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (IsNonNil ptr)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (NotEqual (CMPconst [0] ptr))
|
|
|
|
|
for {
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v.reset(OpARM64NotEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AuxInt = 0
|
|
|
|
|
v0.AddArg(ptr)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpIsSliceInBounds_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (IsSliceInBounds idx len)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessEqualU (CMP idx len))
|
|
|
|
|
for {
|
|
|
|
|
idx := v.Args[0]
|
|
|
|
|
len := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessEqualU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMP, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(idx)
|
|
|
|
|
v0.AddArg(len)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLeq16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Leq16 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessEqual (CMPW (SignExt16to32 x) (SignExt16to32 y)))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt16to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpSignExt16to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLeq16U_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Leq16U x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessEqualU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessEqualU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt16to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLeq32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Leq32 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessEqual (CMPW x y))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLeq32F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Leq32F x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GreaterEqual (FCMPS y x))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64FCMPS, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(y)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLeq32U_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Leq32U x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessEqualU (CMPW x y))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessEqualU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLeq64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Leq64 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessEqual (CMP x y))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMP, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLeq64F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Leq64F x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GreaterEqual (FCMPD y x))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64FCMPD, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(y)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLeq64U_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Leq64U x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessEqualU (CMP x y))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessEqualU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMP, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLeq8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Leq8 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessEqual (CMPW (SignExt8to32 x) (SignExt8to32 y)))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt8to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpSignExt8to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLeq8U_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Leq8U x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessEqualU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessEqualU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt8to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLess16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Less16 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessThan (CMPW (SignExt16to32 x) (SignExt16to32 y)))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessThan)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt16to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpSignExt16to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLess16U_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Less16U x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessThanU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessThanU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt16to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLess32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Less32 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessThan (CMPW x y))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessThan)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLess32F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Less32F x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GreaterThan (FCMPS y x))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterThan)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64FCMPS, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(y)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLess32U_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Less32U x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessThanU (CMPW x y))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessThanU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLess64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Less64 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessThan (CMP x y))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessThan)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMP, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLess64F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Less64F x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GreaterThan (FCMPD y x))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64GreaterThan)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64FCMPD, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(y)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLess64U_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Less64U x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessThanU (CMP x y))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessThanU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMP, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLess8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Less8 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessThan (CMPW (SignExt8to32 x) (SignExt8to32 y)))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessThan)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt8to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpSignExt8to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLess8U_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Less8U x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LessThanU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LessThanU)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt8to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLoad_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: t.IsBoolean()
|
|
|
|
|
// result: (MOVBUload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
|
|
|
|
if !(t.IsBoolean()) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVBUload)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: (is8BitInt(t) && isSigned(t))
|
|
|
|
|
// result: (MOVBload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
|
|
|
|
if !(is8BitInt(t) && isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVBload)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: (is8BitInt(t) && !isSigned(t))
|
|
|
|
|
// result: (MOVBUload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
|
|
|
|
if !(is8BitInt(t) && !isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVBUload)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: (is16BitInt(t) && isSigned(t))
|
|
|
|
|
// result: (MOVHload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
|
|
|
|
if !(is16BitInt(t) && isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVHload)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: (is16BitInt(t) && !isSigned(t))
|
|
|
|
|
// result: (MOVHUload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
|
|
|
|
if !(is16BitInt(t) && !isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVHUload)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: (is32BitInt(t) && isSigned(t))
|
|
|
|
|
// result: (MOVWload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
|
|
|
|
if !(is32BitInt(t) && isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVWload)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: (is32BitInt(t) && !isSigned(t))
|
|
|
|
|
// result: (MOVWUload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
|
|
|
|
if !(is32BitInt(t) && !isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVWUload)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: (is64BitInt(t) || isPtr(t))
|
|
|
|
|
// result: (MOVDload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
|
|
|
|
if !(is64BitInt(t) || isPtr(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDload)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: is32BitFloat(t)
|
|
|
|
|
// result: (FMOVSload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
|
|
|
|
if !(is32BitFloat(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FMOVSload)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: is64BitFloat(t)
|
|
|
|
|
// result: (FMOVDload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
|
|
|
|
if !(is64BitFloat(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FMOVDload)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLsh16x16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Lsh16x16 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLsh16x32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Lsh16x32 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLsh16x64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Lsh16x64 x (MOVDconst [c]))
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: uint64(c) < 16
|
|
|
|
|
// result: (SLLconst x [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) < 16) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Lsh16x64 _ (MOVDconst [c]))
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: uint64(c) >= 16
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) >= 16) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Lsh16x64 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AuxInt = 0
|
|
|
|
|
v.AddArg(v1)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v2.AddArg(y)
|
|
|
|
|
v.AddArg(v2)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLsh16x8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Lsh16x8 <t> x y)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLsh32x16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Lsh32x16 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLsh32x32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Lsh32x32 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLsh32x64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Lsh32x64 x (MOVDconst [c]))
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: uint64(c) < 32
|
|
|
|
|
// result: (SLLconst x [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) < 32) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Lsh32x64 _ (MOVDconst [c]))
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: uint64(c) >= 32
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) >= 32) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Lsh32x64 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AuxInt = 0
|
|
|
|
|
v.AddArg(v1)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v2.AddArg(y)
|
|
|
|
|
v.AddArg(v2)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLsh32x8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Lsh32x8 <t> x y)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLsh64x16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Lsh64x16 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLsh64x32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Lsh64x32 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLsh64x64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Lsh64x64 x (MOVDconst [c]))
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: uint64(c) < 64
|
|
|
|
|
// result: (SLLconst x [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) < 64) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Lsh64x64 _ (MOVDconst [c]))
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: uint64(c) >= 64
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) >= 64) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Lsh64x64 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AuxInt = 0
|
|
|
|
|
v.AddArg(v1)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v2.AddArg(y)
|
|
|
|
|
v.AddArg(v2)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLsh64x8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Lsh64x8 <t> x y)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLsh8x16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Lsh8x16 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLsh8x32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Lsh8x32 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLsh8x64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Lsh8x64 x (MOVDconst [c]))
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: uint64(c) < 8
|
|
|
|
|
// result: (SLLconst x [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) < 8) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SLLconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Lsh8x64 _ (MOVDconst [c]))
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: uint64(c) >= 8
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) >= 8) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Lsh8x64 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AuxInt = 0
|
|
|
|
|
v.AddArg(v1)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v2.AddArg(y)
|
|
|
|
|
v.AddArg(v2)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpLsh8x8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Lsh8x8 <t> x y)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpMod16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Mod16 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MODW (SignExt16to32 x) (SignExt16to32 y))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64MODW)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt16to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpMod16u_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Mod16u x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (UMODW (ZeroExt16to32 x) (ZeroExt16to32 y))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64UMODW)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt16to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpMod32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Mod32 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MODW x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64MODW)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpMod32u_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Mod32u x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (UMODW x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64UMODW)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpMod64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Mod64 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOD x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64MOD)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpMod64u_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Mod64u x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (UMOD x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64UMOD)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpMod8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Mod8 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MODW (SignExt8to32 x) (SignExt8to32 y))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64MODW)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt8to32, types.Int32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpMod8u_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Mod8u x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (UMODW (ZeroExt8to32 x) (ZeroExt8to32 y))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64UMODW)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt8to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpMove_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Move [0] _ _ mem)
|
|
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: mem
|
|
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 0 {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
mem := v.Args[2]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = mem.Type
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Move [1] dst src mem)
|
|
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVBstore dst (MOVBUload src mem) mem)
|
|
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 1 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
dst := v.Args[0]
|
|
|
|
|
src := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVBstore)
|
|
|
|
|
v.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVBUload, types.UInt8)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(src)
|
|
|
|
|
v0.AddArg(mem)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Move [2] dst src mem)
|
|
|
|
|
// cond:
|
2016-08-16 14:17:33 -04:00
|
|
|
// result: (MOVHstore dst (MOVHUload src mem) mem)
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 2 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
dst := v.Args[0]
|
|
|
|
|
src := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVHstore)
|
|
|
|
|
v.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVHUload, types.UInt16)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(src)
|
|
|
|
|
v0.AddArg(mem)
|
2016-08-16 14:17:33 -04:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
v.AddArg(mem)
|
2016-08-03 09:56:36 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Move [4] dst src mem)
|
|
|
|
|
// cond:
|
2016-08-16 14:17:33 -04:00
|
|
|
// result: (MOVWstore dst (MOVWUload src mem) mem)
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 4 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
dst := v.Args[0]
|
|
|
|
|
src := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
2016-08-16 14:17:33 -04:00
|
|
|
v.reset(OpARM64MOVWstore)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(src)
|
|
|
|
|
v0.AddArg(mem)
|
|
|
|
|
v.AddArg(v0)
|
2016-08-16 14:17:33 -04:00
|
|
|
v.AddArg(mem)
|
2016-08-03 09:56:36 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Move [8] dst src mem)
|
|
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDstore dst (MOVDload src mem) mem)
|
|
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 8 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
dst := v.Args[0]
|
|
|
|
|
src := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVDstore)
|
|
|
|
|
v.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDload, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(src)
|
|
|
|
|
v0.AddArg(mem)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Move [3] dst src mem)
|
|
|
|
|
// cond:
|
2016-08-16 14:17:33 -04:00
|
|
|
// result: (MOVBstore [2] dst (MOVBUload [2] src mem) (MOVHstore dst (MOVHUload src mem) mem))
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 3 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
dst := v.Args[0]
|
|
|
|
|
src := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
2016-08-16 14:17:33 -04:00
|
|
|
v.reset(OpARM64MOVBstore)
|
|
|
|
|
v.AuxInt = 2
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVBUload, types.UInt8)
|
2016-08-16 14:17:33 -04:00
|
|
|
v0.AuxInt = 2
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(src)
|
|
|
|
|
v0.AddArg(mem)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVHstore, TypeMem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64MOVHUload, types.UInt16)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(src)
|
|
|
|
|
v2.AddArg(mem)
|
|
|
|
|
v1.AddArg(v2)
|
|
|
|
|
v1.AddArg(mem)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Move [5] dst src mem)
|
|
|
|
|
// cond:
|
2016-08-16 14:17:33 -04:00
|
|
|
// result: (MOVBstore [4] dst (MOVBUload [4] src mem) (MOVWstore dst (MOVWUload src mem) mem))
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 5 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
dst := v.Args[0]
|
|
|
|
|
src := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
2016-08-16 14:17:33 -04:00
|
|
|
v.reset(OpARM64MOVBstore)
|
|
|
|
|
v.AuxInt = 4
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVBUload, types.UInt8)
|
2016-08-16 14:17:33 -04:00
|
|
|
v0.AuxInt = 4
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(src)
|
|
|
|
|
v0.AddArg(mem)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVWstore, TypeMem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64MOVWUload, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(src)
|
|
|
|
|
v2.AddArg(mem)
|
|
|
|
|
v1.AddArg(v2)
|
2016-08-16 14:17:33 -04:00
|
|
|
v1.AddArg(mem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Move [6] dst src mem)
|
|
|
|
|
// cond:
|
2016-08-16 14:17:33 -04:00
|
|
|
// result: (MOVHstore [4] dst (MOVHUload [4] src mem) (MOVWstore dst (MOVWUload src mem) mem))
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 6 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
dst := v.Args[0]
|
|
|
|
|
src := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
2016-08-16 14:17:33 -04:00
|
|
|
v.reset(OpARM64MOVHstore)
|
|
|
|
|
v.AuxInt = 4
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVHUload, types.UInt16)
|
2016-08-16 14:17:33 -04:00
|
|
|
v0.AuxInt = 4
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(src)
|
|
|
|
|
v0.AddArg(mem)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVWstore, TypeMem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64MOVWUload, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(src)
|
|
|
|
|
v2.AddArg(mem)
|
|
|
|
|
v1.AddArg(v2)
|
2016-08-16 14:17:33 -04:00
|
|
|
v1.AddArg(mem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Move [7] dst src mem)
|
|
|
|
|
// cond:
|
2016-08-16 14:17:33 -04:00
|
|
|
// result: (MOVBstore [6] dst (MOVBUload [6] src mem) (MOVHstore [4] dst (MOVHUload [4] src mem) (MOVWstore dst (MOVWUload src mem) mem)))
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 7 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
dst := v.Args[0]
|
|
|
|
|
src := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
2016-08-16 14:17:33 -04:00
|
|
|
v.reset(OpARM64MOVBstore)
|
|
|
|
|
v.AuxInt = 6
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVBUload, types.UInt8)
|
2016-08-16 14:17:33 -04:00
|
|
|
v0.AuxInt = 6
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(src)
|
|
|
|
|
v0.AddArg(mem)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVHstore, TypeMem)
|
2016-08-16 14:17:33 -04:00
|
|
|
v1.AuxInt = 4
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64MOVHUload, types.UInt16)
|
2016-08-16 14:17:33 -04:00
|
|
|
v2.AuxInt = 4
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(src)
|
|
|
|
|
v2.AddArg(mem)
|
|
|
|
|
v1.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64MOVWstore, TypeMem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64MOVWUload, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(src)
|
|
|
|
|
v4.AddArg(mem)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v3.AddArg(mem)
|
|
|
|
|
v1.AddArg(v3)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Move [12] dst src mem)
|
|
|
|
|
// cond:
|
2016-08-16 14:17:33 -04:00
|
|
|
// result: (MOVWstore [8] dst (MOVWUload [8] src mem) (MOVDstore dst (MOVDload src mem) mem))
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 12 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
dst := v.Args[0]
|
|
|
|
|
src := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVWstore)
|
|
|
|
|
v.AuxInt = 8
|
|
|
|
|
v.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AuxInt = 8
|
|
|
|
|
v0.AddArg(src)
|
|
|
|
|
v0.AddArg(mem)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVDstore, TypeMem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64MOVDload, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(src)
|
|
|
|
|
v2.AddArg(mem)
|
|
|
|
|
v1.AddArg(v2)
|
2016-08-16 14:17:33 -04:00
|
|
|
v1.AddArg(mem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueARM64_OpMove_10(v *Value) bool {
|
|
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
|
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Move [16] dst src mem)
|
|
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDstore [8] dst (MOVDload [8] src mem) (MOVDstore dst (MOVDload src mem) mem))
|
|
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 16 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
dst := v.Args[0]
|
|
|
|
|
src := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVDstore)
|
|
|
|
|
v.AuxInt = 8
|
|
|
|
|
v.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDload, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AuxInt = 8
|
|
|
|
|
v0.AddArg(src)
|
|
|
|
|
v0.AddArg(mem)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVDstore, TypeMem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64MOVDload, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(src)
|
|
|
|
|
v2.AddArg(mem)
|
|
|
|
|
v1.AddArg(v2)
|
|
|
|
|
v1.AddArg(mem)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Move [24] dst src mem)
|
|
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDstore [16] dst (MOVDload [16] src mem) (MOVDstore [8] dst (MOVDload [8] src mem) (MOVDstore dst (MOVDload src mem) mem)))
|
|
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 24 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
dst := v.Args[0]
|
|
|
|
|
src := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
|
|
|
|
v.reset(OpARM64MOVDstore)
|
|
|
|
|
v.AuxInt = 16
|
|
|
|
|
v.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDload, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AuxInt = 16
|
|
|
|
|
v0.AddArg(src)
|
|
|
|
|
v0.AddArg(mem)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVDstore, TypeMem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AuxInt = 8
|
|
|
|
|
v1.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64MOVDload, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 8
|
|
|
|
|
v2.AddArg(src)
|
|
|
|
|
v2.AddArg(mem)
|
|
|
|
|
v1.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64MOVDstore, TypeMem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AddArg(dst)
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64MOVDload, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(src)
|
|
|
|
|
v4.AddArg(mem)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v3.AddArg(mem)
|
|
|
|
|
v1.AddArg(v3)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Move [s] dst src mem)
|
2017-03-13 21:51:08 -04:00
|
|
|
// cond: s%8 != 0 && s > 8
|
|
|
|
|
// result: (Move [s%8] (OffPtr <dst.Type> dst [s-s%8]) (OffPtr <src.Type> src [s-s%8]) (Move [s-s%8] dst src mem))
|
2016-08-16 14:17:33 -04:00
|
|
|
for {
|
|
|
|
|
s := v.AuxInt
|
|
|
|
|
dst := v.Args[0]
|
|
|
|
|
src := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
2017-03-13 21:51:08 -04:00
|
|
|
if !(s%8 != 0 && s > 8) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpMove)
|
2017-03-13 21:51:08 -04:00
|
|
|
v.AuxInt = s % 8
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type)
|
2017-03-13 21:51:08 -04:00
|
|
|
v0.AuxInt = s - s%8
|
2016-08-16 14:17:33 -04:00
|
|
|
v0.AddArg(dst)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type)
|
2017-03-13 21:51:08 -04:00
|
|
|
v1.AuxInt = s - s%8
|
2016-08-16 14:17:33 -04:00
|
|
|
v1.AddArg(src)
|
|
|
|
|
v.AddArg(v1)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpMove, TypeMem)
|
2017-03-13 21:51:08 -04:00
|
|
|
v2.AuxInt = s - s%8
|
2016-08-16 14:17:33 -04:00
|
|
|
v2.AddArg(dst)
|
|
|
|
|
v2.AddArg(src)
|
|
|
|
|
v2.AddArg(mem)
|
|
|
|
|
v.AddArg(v2)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Move [s] dst src mem)
|
2017-03-13 21:51:08 -04:00
|
|
|
// cond: s%8 == 0 && s > 24 && s <= 8*128 && !config.noDuffDevice
|
|
|
|
|
// result: (DUFFCOPY [8 * (128 - int64(s/8))] dst src mem)
|
2016-09-27 08:57:02 -04:00
|
|
|
for {
|
|
|
|
|
s := v.AuxInt
|
|
|
|
|
dst := v.Args[0]
|
|
|
|
|
src := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
2017-03-13 21:51:08 -04:00
|
|
|
if !(s%8 == 0 && s > 24 && s <= 8*128 && !config.noDuffDevice) {
|
2016-09-27 08:57:02 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64DUFFCOPY)
|
2017-03-13 21:51:08 -04:00
|
|
|
v.AuxInt = 8 * (128 - int64(s/8))
|
2016-09-27 08:57:02 -04:00
|
|
|
v.AddArg(dst)
|
|
|
|
|
v.AddArg(src)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Move [s] dst src mem)
|
2017-03-13 21:51:08 -04:00
|
|
|
// cond: s > 24 && s%8 == 0
|
|
|
|
|
// result: (LoweredMove dst src (ADDconst <src.Type> src [s-8]) mem)
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
|
|
|
|
s := v.AuxInt
|
|
|
|
|
dst := v.Args[0]
|
|
|
|
|
src := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
2017-03-13 21:51:08 -04:00
|
|
|
if !(s > 24 && s%8 == 0) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64LoweredMove)
|
|
|
|
|
v.AddArg(dst)
|
|
|
|
|
v.AddArg(src)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDconst, src.Type)
|
2017-03-13 21:51:08 -04:00
|
|
|
v0.AuxInt = s - 8
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(src)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpMul16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Mul16 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MULW x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64MULW)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpMul32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Mul32 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MULW x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64MULW)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpMul32F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Mul32F x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (FMULS x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64FMULS)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpMul64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Mul64 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MUL x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64MUL)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpMul64F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Mul64F x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (FMULD x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64FMULD)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpMul8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Mul8 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MULW x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64MULW)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpNeg16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Neg16 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (NEG x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64NEG)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpNeg32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Neg32 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (NEG x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64NEG)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpNeg32F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Neg32F x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (FNEGS x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64FNEGS)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpNeg64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Neg64 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (NEG x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64NEG)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpNeg64F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Neg64F x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (FNEGD x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64FNEGD)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpNeg8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Neg8 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (NEG x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64NEG)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpNeq16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Neq16 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (NotEqual (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64NotEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt16to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpNeq32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Neq32 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (NotEqual (CMPW x y))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64NotEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpNeq32F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Neq32F x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (NotEqual (FCMPS x y))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64NotEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64FCMPS, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpNeq64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Neq64 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (NotEqual (CMP x y))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64NotEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMP, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpNeq64F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Neq64F x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (NotEqual (FCMPD x y))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64NotEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64FCMPD, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpNeq8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Neq8 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (NotEqual (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64NotEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMPW, TypeFlags)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt8to32, types.UInt32)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpNeqB_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (NeqB x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XOR x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64XOR)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpNeqPtr_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (NeqPtr x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (NotEqual (CMP x y))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64NotEqual)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CMP, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpNilCheck_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (NilCheck ptr mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LoweredNilCheck ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
|
|
|
|
v.reset(OpARM64LoweredNilCheck)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpNot_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Not x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XOR (MOVDconst [1]) x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64XOR)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AuxInt = 1
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpOffPtr_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (OffPtr [off] ptr:(SP))
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVDaddr [off] ptr)
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
if ptr.Op != OpSP {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDaddr)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (OffPtr [off] ptr)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ADDconst [off] ptr)
|
|
|
|
|
for {
|
|
|
|
|
off := v.AuxInt
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
v.reset(OpARM64ADDconst)
|
|
|
|
|
v.AuxInt = off
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpOr16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Or16 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (OR x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64OR)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpOr32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Or32 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (OR x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64OR)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpOr64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Or64 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (OR x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64OR)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpOr8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Or8 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (OR x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64OR)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpOrB_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (OrB x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (OR x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64OR)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRound32F_0(v *Value) bool {
|
2017-02-12 22:12:12 -05:00
|
|
|
// match: (Round32F x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRound64F_0(v *Value) bool {
|
2017-02-12 22:12:12 -05:00
|
|
|
// match: (Round64F x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh16Ux16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh16Ux16 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SRL <t> (ZeroExt16to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 0
|
|
|
|
|
v.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v.AddArg(v4)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh16Ux32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh16Ux32 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SRL <t> (ZeroExt16to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 0
|
|
|
|
|
v.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v.AddArg(v4)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh16Ux64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh16Ux64 x (MOVDconst [c]))
|
|
|
|
|
// cond: uint64(c) < 16
|
|
|
|
|
// result: (SRLconst (ZeroExt16to64 x) [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) < 16) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SRLconst)
|
|
|
|
|
v.AuxInt = c
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Rsh16Ux64 _ (MOVDconst [c]))
|
|
|
|
|
// cond: uint64(c) >= 16
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) >= 16) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Rsh16Ux64 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh16Ux8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Rsh16Ux8 <t> x y)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SRL <t> (ZeroExt16to64 x) (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 0
|
|
|
|
|
v.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v.AddArg(v4)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh16x16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh16x16 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SRA (SignExt16to64 x) (CSELULT <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRA)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64CSELULT, y.Type)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 63
|
|
|
|
|
v1.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v1.AddArg(v4)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh16x32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh16x32 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SRA (SignExt16to64 x) (CSELULT <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRA)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64CSELULT, y.Type)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 63
|
|
|
|
|
v1.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v1.AddArg(v4)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh16x64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Rsh16x64 x (MOVDconst [c]))
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: uint64(c) < 16
|
|
|
|
|
// result: (SRAconst (SignExt16to64 x) [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) < 16) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SRAconst)
|
|
|
|
|
v.AuxInt = c
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Rsh16x64 x (MOVDconst [c]))
|
|
|
|
|
// cond: uint64(c) >= 16
|
|
|
|
|
// result: (SRAconst (SignExt16to64 x) [63])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) >= 16) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SRAconst)
|
|
|
|
|
v.AuxInt = 63
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Rsh16x64 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SRA (SignExt16to64 x) (CSELULT <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRA)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64CSELULT, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 63
|
|
|
|
|
v1.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v1.AddArg(v3)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh16x8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Rsh16x8 x y)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (SRA (SignExt16to64 x) (CSELULT <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRA)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64CSELULT, y.Type)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 63
|
|
|
|
|
v1.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v1.AddArg(v4)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh32Ux16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh32Ux16 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SRL <t> (ZeroExt32to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 0
|
|
|
|
|
v.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v.AddArg(v4)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh32Ux32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh32Ux32 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SRL <t> (ZeroExt32to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 0
|
|
|
|
|
v.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v.AddArg(v4)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh32Ux64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh32Ux64 x (MOVDconst [c]))
|
|
|
|
|
// cond: uint64(c) < 32
|
|
|
|
|
// result: (SRLconst (ZeroExt32to64 x) [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) < 32) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SRLconst)
|
|
|
|
|
v.AuxInt = c
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Rsh32Ux64 _ (MOVDconst [c]))
|
|
|
|
|
// cond: uint64(c) >= 32
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) >= 32) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Rsh32Ux64 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh32Ux8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Rsh32Ux8 <t> x y)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SRL <t> (ZeroExt32to64 x) (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 0
|
|
|
|
|
v.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v.AddArg(v4)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh32x16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh32x16 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SRA (SignExt32to64 x) (CSELULT <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRA)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt32to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64CSELULT, y.Type)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 63
|
|
|
|
|
v1.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v1.AddArg(v4)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh32x32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh32x32 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SRA (SignExt32to64 x) (CSELULT <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRA)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt32to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64CSELULT, y.Type)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 63
|
|
|
|
|
v1.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v1.AddArg(v4)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh32x64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Rsh32x64 x (MOVDconst [c]))
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: uint64(c) < 32
|
|
|
|
|
// result: (SRAconst (SignExt32to64 x) [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) < 32) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SRAconst)
|
|
|
|
|
v.AuxInt = c
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt32to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Rsh32x64 x (MOVDconst [c]))
|
|
|
|
|
// cond: uint64(c) >= 32
|
|
|
|
|
// result: (SRAconst (SignExt32to64 x) [63])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) >= 32) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SRAconst)
|
|
|
|
|
v.AuxInt = 63
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt32to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Rsh32x64 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SRA (SignExt32to64 x) (CSELULT <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRA)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt32to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64CSELULT, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 63
|
|
|
|
|
v1.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v1.AddArg(v3)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh32x8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Rsh32x8 x y)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (SRA (SignExt32to64 x) (CSELULT <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRA)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt32to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64CSELULT, y.Type)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 63
|
|
|
|
|
v1.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v1.AddArg(v4)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh64Ux16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh64Ux16 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SRL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh64Ux32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh64Ux32 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SRL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh64Ux64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Rsh64Ux64 x (MOVDconst [c]))
|
|
|
|
|
// cond: uint64(c) < 64
|
|
|
|
|
// result: (SRLconst x [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) < 64) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SRLconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Rsh64Ux64 _ (MOVDconst [c]))
|
|
|
|
|
// cond: uint64(c) >= 64
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) >= 64) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Rsh64Ux64 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AuxInt = 0
|
|
|
|
|
v.AddArg(v1)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v2.AddArg(y)
|
|
|
|
|
v.AddArg(v2)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh64Ux8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Rsh64Ux8 <t> x y)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SRL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh64x16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh64x16 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SRA x (CSELULT <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRA)
|
|
|
|
|
v.AddArg(x)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CSELULT, y.Type)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 63
|
|
|
|
|
v0.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v0.AddArg(v3)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh64x32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh64x32 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SRA x (CSELULT <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRA)
|
|
|
|
|
v.AddArg(x)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CSELULT, y.Type)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 63
|
|
|
|
|
v0.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v0.AddArg(v3)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh64x64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Rsh64x64 x (MOVDconst [c]))
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: uint64(c) < 64
|
|
|
|
|
// result: (SRAconst x [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) < 64) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SRAconst)
|
|
|
|
|
v.AuxInt = c
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Rsh64x64 x (MOVDconst [c]))
|
|
|
|
|
// cond: uint64(c) >= 64
|
|
|
|
|
// result: (SRAconst x [63])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) >= 64) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SRAconst)
|
|
|
|
|
v.AuxInt = 63
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Rsh64x64 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SRA x (CSELULT <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRA)
|
|
|
|
|
v.AddArg(x)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CSELULT, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(y)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpConst64, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AuxInt = 63
|
|
|
|
|
v0.AddArg(v1)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 64
|
|
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh64x8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Rsh64x8 x y)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (SRA x (CSELULT <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRA)
|
|
|
|
|
v.AddArg(x)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64CSELULT, y.Type)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
|
|
|
|
v0.AddArg(v1)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 63
|
|
|
|
|
v0.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v0.AddArg(v3)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh8Ux16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh8Ux16 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SRL <t> (ZeroExt8to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 0
|
|
|
|
|
v.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v.AddArg(v4)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh8Ux32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh8Ux32 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SRL <t> (ZeroExt8to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 0
|
|
|
|
|
v.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v.AddArg(v4)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh8Ux64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Rsh8Ux64 x (MOVDconst [c]))
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: uint64(c) < 8
|
|
|
|
|
// result: (SRLconst (ZeroExt8to64 x) [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) < 8) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SRLconst)
|
|
|
|
|
v.AuxInt = c
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Rsh8Ux64 _ (MOVDconst [c]))
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: uint64(c) >= 8
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) >= 8) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDconst)
|
|
|
|
|
v.AuxInt = 0
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Rsh8Ux64 <t> x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v0.AddArg(y)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v.AddArg(v3)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh8Ux8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Rsh8Ux8 <t> x y)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (CSELULT (SRL <t> (ZeroExt8to64 x) (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64CSELULT)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64SRL, t)
|
2017-03-17 16:04:46 -07:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg(v2)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, t)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 0
|
|
|
|
|
v.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v.AddArg(v4)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh8x16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh8x16 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SRA (SignExt8to64 x) (CSELULT <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRA)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64CSELULT, y.Type)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 63
|
|
|
|
|
v1.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt16to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v1.AddArg(v4)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh8x32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Rsh8x32 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SRA (SignExt8to64 x) (CSELULT <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRA)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64CSELULT, y.Type)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 63
|
|
|
|
|
v1.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt32to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v1.AddArg(v4)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh8x64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Rsh8x64 x (MOVDconst [c]))
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: uint64(c) < 8
|
|
|
|
|
// result: (SRAconst (SignExt8to64 x) [c])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) < 8) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SRAconst)
|
|
|
|
|
v.AuxInt = c
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Rsh8x64 x (MOVDconst [c]))
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond: uint64(c) >= 8
|
|
|
|
|
// result: (SRAconst (SignExt8to64 x) [63])
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
if v_1.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v_1.AuxInt
|
|
|
|
|
if !(uint64(c) >= 8) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64SRAconst)
|
|
|
|
|
v.AuxInt = 63
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Rsh8x64 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SRA (SignExt8to64 x) (CSELULT <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRA)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64CSELULT, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(y)
|
2016-12-07 18:14:35 -08:00
|
|
|
v2 := b.NewValue0(v.Pos, OpConst64, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 63
|
|
|
|
|
v1.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 64
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v1.AddArg(v3)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpRsh8x8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-30 03:30:22 +00:00
|
|
|
// match: (Rsh8x8 x y)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (SRA (SignExt8to64 x) (CSELULT <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SRA)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to64, types.Int64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(x)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64CSELULT, y.Type)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpConst64, y.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AuxInt = 63
|
|
|
|
|
v1.AddArg(v3)
|
2016-12-07 18:14:35 -08:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64CMPconst, TypeFlags)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 64
|
2017-03-17 16:04:46 -07:00
|
|
|
v5 := b.NewValue0(v.Pos, OpZeroExt8to64, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v5.AddArg(y)
|
|
|
|
|
v4.AddArg(v5)
|
|
|
|
|
v1.AddArg(v4)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpSignExt16to32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (SignExt16to32 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVHreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64MOVHreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpSignExt16to64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (SignExt16to64 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVHreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64MOVHreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpSignExt32to64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (SignExt32to64 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVWreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64MOVWreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpSignExt8to16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (SignExt8to16 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVBreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64MOVBreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpSignExt8to32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (SignExt8to32 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVBreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64MOVBreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpSignExt8to64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (SignExt8to64 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVBreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64MOVBreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpSlicemask_0(v *Value) bool {
|
2016-10-25 15:49:52 -07:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
// match: (Slicemask <t> x)
|
|
|
|
|
// cond:
|
2017-01-24 12:48:10 -08:00
|
|
|
// result: (SRAconst (NEG <t> x) [63])
|
2016-10-25 15:49:52 -07:00
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v.Args[0]
|
2017-01-24 12:48:10 -08:00
|
|
|
v.reset(OpARM64SRAconst)
|
|
|
|
|
v.AuxInt = 63
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64NEG, t)
|
|
|
|
|
v0.AddArg(x)
|
2016-10-25 15:49:52 -07:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpSqrt_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Sqrt x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (FSQRTD x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64FSQRTD)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpStaticCall_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (StaticCall [argwid] {target} mem)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (CALLstatic [argwid] {target} mem)
|
|
|
|
|
for {
|
|
|
|
|
argwid := v.AuxInt
|
|
|
|
|
target := v.Aux
|
|
|
|
|
mem := v.Args[0]
|
|
|
|
|
v.reset(OpARM64CALLstatic)
|
|
|
|
|
v.AuxInt = argwid
|
|
|
|
|
v.Aux = target
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpStore_0(v *Value) bool {
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Store {t} ptr val mem)
|
2017-04-28 00:19:49 +00:00
|
|
|
// cond: t.(Type).Size() == 1
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVBstore ptr val mem)
|
|
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
t := v.Aux
|
2016-08-03 09:56:36 -04:00
|
|
|
ptr := v.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
2017-04-28 00:19:49 +00:00
|
|
|
if !(t.(Type).Size() == 1) {
|
2017-03-13 21:51:08 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVBstore)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Store {t} ptr val mem)
|
2017-04-28 00:19:49 +00:00
|
|
|
// cond: t.(Type).Size() == 2
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVHstore ptr val mem)
|
|
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
t := v.Aux
|
2016-08-03 09:56:36 -04:00
|
|
|
ptr := v.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
2017-04-28 00:19:49 +00:00
|
|
|
if !(t.(Type).Size() == 2) {
|
2017-03-13 21:51:08 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVHstore)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Store {t} ptr val mem)
|
2017-04-28 00:19:49 +00:00
|
|
|
// cond: t.(Type).Size() == 4 && !is32BitFloat(val.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVWstore ptr val mem)
|
|
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
t := v.Aux
|
2016-08-03 09:56:36 -04:00
|
|
|
ptr := v.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
2017-04-28 00:19:49 +00:00
|
|
|
if !(t.(Type).Size() == 4 && !is32BitFloat(val.Type)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVWstore)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Store {t} ptr val mem)
|
2017-04-28 00:19:49 +00:00
|
|
|
// cond: t.(Type).Size() == 8 && !is64BitFloat(val.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDstore ptr val mem)
|
|
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
t := v.Aux
|
2016-08-03 09:56:36 -04:00
|
|
|
ptr := v.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
2017-04-28 00:19:49 +00:00
|
|
|
if !(t.(Type).Size() == 8 && !is64BitFloat(val.Type)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64MOVDstore)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Store {t} ptr val mem)
|
2017-04-28 00:19:49 +00:00
|
|
|
// cond: t.(Type).Size() == 4 && is32BitFloat(val.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FMOVSstore ptr val mem)
|
|
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
t := v.Aux
|
2016-08-03 09:56:36 -04:00
|
|
|
ptr := v.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
2017-04-28 00:19:49 +00:00
|
|
|
if !(t.(Type).Size() == 4 && is32BitFloat(val.Type)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FMOVSstore)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Store {t} ptr val mem)
|
2017-04-28 00:19:49 +00:00
|
|
|
// cond: t.(Type).Size() == 8 && is64BitFloat(val.Type)
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (FMOVDstore ptr val mem)
|
|
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
t := v.Aux
|
2016-08-03 09:56:36 -04:00
|
|
|
ptr := v.Args[0]
|
|
|
|
|
val := v.Args[1]
|
|
|
|
|
mem := v.Args[2]
|
2017-04-28 00:19:49 +00:00
|
|
|
if !(t.(Type).Size() == 8 && is64BitFloat(val.Type)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64FMOVDstore)
|
|
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(val)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpSub16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Sub16 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SUB x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SUB)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpSub32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Sub32 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SUB x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SUB)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpSub32F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Sub32F x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (FSUBS x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64FSUBS)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpSub64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Sub64 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SUB x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SUB)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpSub64F_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Sub64F x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (FSUBD x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64FSUBD)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpSub8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Sub8 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SUB x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SUB)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpSubPtr_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (SubPtr x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (SUB x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64SUB)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpTrunc16to8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Trunc16to8 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpTrunc32to16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Trunc32to16 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpTrunc32to8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Trunc32to8 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpTrunc64to16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Trunc64to16 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpTrunc64to32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Trunc64to32 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpTrunc64to8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Trunc64to8 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = x.Type
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpXor16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Xor16 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XOR x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64XOR)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpXor32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Xor32 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XOR x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64XOR)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpXor64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Xor64 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XOR x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64XOR)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpXor8_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (Xor8 x y)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (XOR x y)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
y := v.Args[1]
|
|
|
|
|
v.reset(OpARM64XOR)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
v.AddArg(y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpZero_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Zero [0] _ mem)
|
|
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: mem
|
|
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 0 {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
mem := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpCopy)
|
|
|
|
|
v.Type = mem.Type
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Zero [1] ptr mem)
|
|
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVBstore ptr (MOVDconst [0]) mem)
|
|
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 1 {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVBstore)
|
|
|
|
|
v.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AuxInt = 0
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Zero [2] ptr mem)
|
|
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVHstore ptr (MOVDconst [0]) mem)
|
|
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 2 {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVHstore)
|
|
|
|
|
v.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AuxInt = 0
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Zero [4] ptr mem)
|
|
|
|
|
// cond:
|
2016-08-16 14:17:33 -04:00
|
|
|
// result: (MOVWstore ptr (MOVDconst [0]) mem)
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 4 {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
2016-08-16 14:17:33 -04:00
|
|
|
v.reset(OpARM64MOVWstore)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AuxInt = 0
|
|
|
|
|
v.AddArg(v0)
|
2016-08-16 14:17:33 -04:00
|
|
|
v.AddArg(mem)
|
2016-08-03 09:56:36 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Zero [8] ptr mem)
|
|
|
|
|
// cond:
|
2016-08-16 14:17:33 -04:00
|
|
|
// result: (MOVDstore ptr (MOVDconst [0]) mem)
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 8 {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
2016-08-16 14:17:33 -04:00
|
|
|
v.reset(OpARM64MOVDstore)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AuxInt = 0
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Zero [3] ptr mem)
|
|
|
|
|
// cond:
|
2016-08-16 14:17:33 -04:00
|
|
|
// result: (MOVBstore [2] ptr (MOVDconst [0]) (MOVHstore ptr (MOVDconst [0]) mem))
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 3 {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
2016-08-16 14:17:33 -04:00
|
|
|
v.reset(OpARM64MOVBstore)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AuxInt = 2
|
|
|
|
|
v.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AuxInt = 0
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVHstore, TypeMem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v1.AddArg(v2)
|
|
|
|
|
v1.AddArg(mem)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Zero [5] ptr mem)
|
|
|
|
|
// cond:
|
2016-08-16 14:17:33 -04:00
|
|
|
// result: (MOVBstore [4] ptr (MOVDconst [0]) (MOVWstore ptr (MOVDconst [0]) mem))
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 5 {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVBstore)
|
|
|
|
|
v.AuxInt = 4
|
|
|
|
|
v.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AuxInt = 0
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVWstore, TypeMem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v1.AddArg(v2)
|
|
|
|
|
v1.AddArg(mem)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Zero [6] ptr mem)
|
|
|
|
|
// cond:
|
2016-08-16 14:17:33 -04:00
|
|
|
// result: (MOVHstore [4] ptr (MOVDconst [0]) (MOVWstore ptr (MOVDconst [0]) mem))
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 6 {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVHstore)
|
2016-08-16 14:17:33 -04:00
|
|
|
v.AuxInt = 4
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AuxInt = 0
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVWstore, TypeMem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v1.AddArg(v2)
|
2016-08-16 14:17:33 -04:00
|
|
|
v1.AddArg(mem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Zero [7] ptr mem)
|
|
|
|
|
// cond:
|
2016-08-16 14:17:33 -04:00
|
|
|
// result: (MOVBstore [6] ptr (MOVDconst [0]) (MOVHstore [4] ptr (MOVDconst [0]) (MOVWstore ptr (MOVDconst [0]) mem)))
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 7 {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVBstore)
|
2016-08-16 14:17:33 -04:00
|
|
|
v.AuxInt = 6
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AuxInt = 0
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVHstore, TypeMem)
|
2016-08-16 14:17:33 -04:00
|
|
|
v1.AuxInt = 4
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v1.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64MOVWstore, TypeMem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 0
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v3.AddArg(mem)
|
|
|
|
|
v1.AddArg(v3)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Zero [12] ptr mem)
|
|
|
|
|
// cond:
|
2016-08-16 14:17:33 -04:00
|
|
|
// result: (MOVWstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem))
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 12 {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVWstore)
|
|
|
|
|
v.AuxInt = 8
|
|
|
|
|
v.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AuxInt = 0
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVDstore, TypeMem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v1.AddArg(v2)
|
2016-08-16 14:17:33 -04:00
|
|
|
v1.AddArg(mem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueARM64_OpZero_10(v *Value) bool {
|
|
|
|
|
b := v.Block
|
|
|
|
|
_ = b
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
|
|
|
|
types := &b.Func.Config.Types
|
|
|
|
|
_ = types
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Zero [16] ptr mem)
|
|
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem))
|
|
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 16 {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVDstore)
|
|
|
|
|
v.AuxInt = 8
|
|
|
|
|
v.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AuxInt = 0
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVDstore, TypeMem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v1.AddArg(v2)
|
|
|
|
|
v1.AddArg(mem)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
// match: (Zero [24] ptr mem)
|
|
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (MOVDstore [16] ptr (MOVDconst [0]) (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)))
|
|
|
|
|
for {
|
2017-03-13 21:51:08 -04:00
|
|
|
if v.AuxInt != 24 {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2017-03-13 21:51:08 -04:00
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
2016-08-03 09:56:36 -04:00
|
|
|
v.reset(OpARM64MOVDstore)
|
|
|
|
|
v.AuxInt = 16
|
|
|
|
|
v.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AuxInt = 0
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpARM64MOVDstore, TypeMem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v1.AuxInt = 8
|
|
|
|
|
v1.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v2 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v2.AuxInt = 0
|
|
|
|
|
v1.AddArg(v2)
|
2016-12-07 18:14:35 -08:00
|
|
|
v3 := b.NewValue0(v.Pos, OpARM64MOVDstore, TypeMem)
|
2016-08-03 09:56:36 -04:00
|
|
|
v3.AddArg(ptr)
|
2017-03-17 16:04:46 -07:00
|
|
|
v4 := b.NewValue0(v.Pos, OpARM64MOVDconst, types.UInt64)
|
2016-08-03 09:56:36 -04:00
|
|
|
v4.AuxInt = 0
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v3.AddArg(mem)
|
|
|
|
|
v1.AddArg(v3)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Zero [s] ptr mem)
|
2017-03-13 21:51:08 -04:00
|
|
|
// cond: s%8 != 0 && s > 8
|
|
|
|
|
// result: (Zero [s%8] (OffPtr <ptr.Type> ptr [s-s%8]) (Zero [s-s%8] ptr mem))
|
2016-08-16 14:17:33 -04:00
|
|
|
for {
|
|
|
|
|
s := v.AuxInt
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
2017-03-13 21:51:08 -04:00
|
|
|
if !(s%8 != 0 && s > 8) {
|
2016-08-16 14:17:33 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpZero)
|
2017-03-13 21:51:08 -04:00
|
|
|
v.AuxInt = s % 8
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpOffPtr, ptr.Type)
|
2017-03-13 21:51:08 -04:00
|
|
|
v0.AuxInt = s - s%8
|
2016-08-16 14:17:33 -04:00
|
|
|
v0.AddArg(ptr)
|
|
|
|
|
v.AddArg(v0)
|
2016-12-07 18:14:35 -08:00
|
|
|
v1 := b.NewValue0(v.Pos, OpZero, TypeMem)
|
2017-03-13 21:51:08 -04:00
|
|
|
v1.AuxInt = s - s%8
|
2016-08-16 14:17:33 -04:00
|
|
|
v1.AddArg(ptr)
|
|
|
|
|
v1.AddArg(mem)
|
|
|
|
|
v.AddArg(v1)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Zero [s] ptr mem)
|
2017-03-13 21:51:08 -04:00
|
|
|
// cond: s%8 == 0 && s > 24 && s <= 8*128 && !config.noDuffDevice
|
|
|
|
|
// result: (DUFFZERO [4 * (128 - int64(s/8))] ptr mem)
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
|
|
|
|
s := v.AuxInt
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
2017-03-13 21:51:08 -04:00
|
|
|
if !(s%8 == 0 && s > 24 && s <= 8*128 && !config.noDuffDevice) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64DUFFZERO)
|
2017-03-13 21:51:08 -04:00
|
|
|
v.AuxInt = 4 * (128 - int64(s/8))
|
2016-08-03 09:56:36 -04:00
|
|
|
v.AddArg(ptr)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Zero [s] ptr mem)
|
2017-03-13 21:51:08 -04:00
|
|
|
// cond: s%8 == 0 && (s > 8*128 || config.noDuffDevice)
|
|
|
|
|
// result: (LoweredZero ptr (ADDconst <ptr.Type> [s-8] ptr) mem)
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
|
|
|
|
s := v.AuxInt
|
|
|
|
|
ptr := v.Args[0]
|
|
|
|
|
mem := v.Args[1]
|
2017-03-13 21:51:08 -04:00
|
|
|
if !(s%8 == 0 && (s > 8*128 || config.noDuffDevice)) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpARM64LoweredZero)
|
|
|
|
|
v.AddArg(ptr)
|
2016-12-07 18:14:35 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpARM64ADDconst, ptr.Type)
|
2017-03-13 21:51:08 -04:00
|
|
|
v0.AuxInt = s - 8
|
2016-08-03 09:56:36 -04:00
|
|
|
v0.AddArg(ptr)
|
|
|
|
|
v.AddArg(v0)
|
|
|
|
|
v.AddArg(mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpZeroExt16to32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ZeroExt16to32 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVHUreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64MOVHUreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpZeroExt16to64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ZeroExt16to64 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVHUreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64MOVHUreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpZeroExt32to64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ZeroExt32to64 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVWUreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64MOVWUreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpZeroExt8to16_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ZeroExt8to16 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVBUreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64MOVBUreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpZeroExt8to32_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ZeroExt8to32 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVBUreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64MOVBUreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-20 15:47:06 -07:00
|
|
|
func rewriteValueARM64_OpZeroExt8to64_0(v *Value) bool {
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ZeroExt8to64 x)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (MOVBUreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
v.reset(OpARM64MOVBUreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-03-17 10:50:20 -07:00
|
|
|
func rewriteBlockARM64(b *Block) bool {
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
_ = config
|
2017-03-16 22:42:10 -07:00
|
|
|
fe := b.Func.fe
|
2017-03-17 10:50:20 -07:00
|
|
|
_ = fe
|
2017-03-17 16:04:46 -07:00
|
|
|
types := &config.Types
|
|
|
|
|
_ = types
|
2016-08-03 09:56:36 -04:00
|
|
|
switch b.Kind {
|
|
|
|
|
case BlockARM64EQ:
|
2016-09-28 11:20:58 -04:00
|
|
|
// match: (EQ (CMPconst [0] x) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (Z x yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64CMPconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64Z
|
|
|
|
|
b.SetControl(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (EQ (CMPWconst [0] x) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ZW x yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64CMPWconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64ZW
|
|
|
|
|
b.SetControl(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (EQ (FlagEQ) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagEQ {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (EQ (FlagLT_ULT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (EQ (FlagLT_UGT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagLT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (EQ (FlagGT_ULT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagGT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (EQ (FlagGT_UGT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagGT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (EQ (InvertFlags cmp) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (EQ cmp yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64InvertFlags {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
cmp := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64EQ
|
|
|
|
|
b.SetControl(cmp)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
case BlockARM64GE:
|
|
|
|
|
// match: (GE (FlagEQ) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagEQ {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (GE (FlagLT_ULT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (GE (FlagLT_UGT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagLT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (GE (FlagGT_ULT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagGT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (GE (FlagGT_UGT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagGT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (GE (InvertFlags cmp) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LE cmp yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64InvertFlags {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
cmp := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64LE
|
|
|
|
|
b.SetControl(cmp)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
case BlockARM64GT:
|
|
|
|
|
// match: (GT (FlagEQ) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagEQ {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (GT (FlagLT_ULT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (GT (FlagLT_UGT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagLT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (GT (FlagGT_ULT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagGT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (GT (FlagGT_UGT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagGT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (GT (InvertFlags cmp) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LT cmp yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64InvertFlags {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
cmp := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64LT
|
|
|
|
|
b.SetControl(cmp)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
case BlockIf:
|
|
|
|
|
// match: (If (Equal cc) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (EQ cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64Equal {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
cc := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64EQ
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (If (NotEqual cc) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (NE cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64NotEqual {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
cc := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64NE
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (If (LessThan cc) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LT cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64LessThan {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
cc := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64LT
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (If (LessThanU cc) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ULT cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64LessThanU {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
cc := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64ULT
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (If (LessEqual cc) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LE cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64LessEqual {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
cc := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64LE
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (If (LessEqualU cc) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ULE cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64LessEqualU {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
cc := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64ULE
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (If (GreaterThan cc) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GT cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64GreaterThan {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
cc := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64GT
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (If (GreaterThanU cc) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (UGT cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64GreaterThanU {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
cc := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64UGT
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (If (GreaterEqual cc) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GE cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64GreaterEqual {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
cc := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64GE
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (If (GreaterEqualU cc) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (UGE cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64GreaterEqualU {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
cc := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64UGE
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (If cond yes no)
|
|
|
|
|
// cond:
|
2016-09-28 11:20:58 -04:00
|
|
|
// result: (NZ cond yes no)
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-19 16:35:36 -04:00
|
|
|
_ = v
|
2016-08-03 09:56:36 -04:00
|
|
|
cond := b.Control
|
2016-09-28 11:20:58 -04:00
|
|
|
b.Kind = BlockARM64NZ
|
|
|
|
|
b.SetControl(cond)
|
2016-08-03 09:56:36 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
case BlockARM64LE:
|
|
|
|
|
// match: (LE (FlagEQ) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagEQ {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (LE (FlagLT_ULT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (LE (FlagLT_UGT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagLT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (LE (FlagGT_ULT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagGT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (LE (FlagGT_UGT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagGT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (LE (InvertFlags cmp) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GE cmp yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64InvertFlags {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
cmp := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64GE
|
|
|
|
|
b.SetControl(cmp)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
case BlockARM64LT:
|
|
|
|
|
// match: (LT (FlagEQ) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagEQ {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (LT (FlagLT_ULT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (LT (FlagLT_UGT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagLT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (LT (FlagGT_ULT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagGT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (LT (FlagGT_UGT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagGT_UGT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (LT (InvertFlags cmp) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (GT cmp yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64InvertFlags {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
cmp := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64GT
|
|
|
|
|
b.SetControl(cmp)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
case BlockARM64NE:
|
2016-09-28 11:20:58 -04:00
|
|
|
// match: (NE (CMPconst [0] x) yes no)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
2016-09-28 11:20:58 -04:00
|
|
|
// result: (NZ x yes no)
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64CMPconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64NZ
|
|
|
|
|
b.SetControl(x)
|
2016-08-03 09:56:36 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
// match: (NE (CMPWconst [0] x) yes no)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
2016-09-28 11:20:58 -04:00
|
|
|
// result: (NZW x yes no)
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-09-28 11:20:58 -04:00
|
|
|
if v.Op != OpARM64CMPWconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
x := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64NZW
|
|
|
|
|
b.SetControl(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (NE (FlagEQ) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagEQ {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (NE (FlagLT_ULT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
2016-08-03 09:56:36 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
// match: (NE (FlagLT_UGT) yes no)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
2016-09-28 11:20:58 -04:00
|
|
|
// result: (First nil yes no)
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-09-28 11:20:58 -04:00
|
|
|
if v.Op != OpARM64FlagLT_UGT {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (NE (FlagGT_ULT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagGT_ULT {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (NE (FlagGT_UGT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagGT_UGT {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
2016-08-03 09:56:36 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
// match: (NE (InvertFlags cmp) yes no)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
2016-09-28 11:20:58 -04:00
|
|
|
// result: (NE cmp yes no)
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-09-28 11:20:58 -04:00
|
|
|
if v.Op != OpARM64InvertFlags {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
cmp := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64NE
|
|
|
|
|
b.SetControl(cmp)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
case BlockARM64NZ:
|
|
|
|
|
// match: (NZ (Equal cc) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (EQ cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64Equal {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
cc := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64EQ
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (NZ (NotEqual cc) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (NE cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64NotEqual {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
cc := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64NE
|
2016-08-03 09:56:36 -04:00
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
// match: (NZ (LessThan cc) yes no)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
2016-09-28 11:20:58 -04:00
|
|
|
// result: (LT cc yes no)
|
2016-08-03 09:56:36 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-09-28 11:20:58 -04:00
|
|
|
if v.Op != OpARM64LessThan {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
cc := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64LT
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (NZ (LessThanU cc) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (ULT cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64LessThanU {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
cc := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64ULT
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (NZ (LessEqual cc) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (LE cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64LessEqual {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
cc := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockARM64LE
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
// match: (NZ (LessEqualU cc) yes no)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (ULE cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-09-28 11:20:58 -04:00
|
|
|
if v.Op != OpARM64LessEqualU {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
cc := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockARM64ULE
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
// match: (NZ (GreaterThan cc) yes no)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (GT cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-09-28 11:20:58 -04:00
|
|
|
if v.Op != OpARM64GreaterThan {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
cc := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockARM64GT
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
// match: (NZ (GreaterThanU cc) yes no)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (UGT cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-09-28 11:20:58 -04:00
|
|
|
if v.Op != OpARM64GreaterThanU {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
cc := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockARM64UGT
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
// match: (NZ (GreaterEqual cc) yes no)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (GE cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-09-28 11:20:58 -04:00
|
|
|
if v.Op != OpARM64GreaterEqual {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
cc := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockARM64GE
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
// match: (NZ (GreaterEqualU cc) yes no)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (UGE cc yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-09-28 11:20:58 -04:00
|
|
|
if v.Op != OpARM64GreaterEqualU {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
cc := v.Args[0]
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockARM64UGE
|
|
|
|
|
b.SetControl(cc)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
// match: (NZ (MOVDconst [0]) yes no)
|
2016-08-03 09:56:36 -04:00
|
|
|
// cond:
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-09-28 11:20:58 -04:00
|
|
|
if v.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v.AuxInt != 0 {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
// match: (NZ (MOVDconst [c]) yes no)
|
|
|
|
|
// cond: c != 0
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-09-28 11:20:58 -04:00
|
|
|
if v.Op != OpARM64MOVDconst {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
c := v.AuxInt
|
|
|
|
|
if !(c != 0) {
|
2016-08-03 09:56:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
case BlockARM64NZW:
|
|
|
|
|
// match: (NZW (MOVDconst [c]) yes no)
|
|
|
|
|
// cond: int32(c) == 0
|
|
|
|
|
// result: (First nil no yes)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-09-28 11:20:58 -04:00
|
|
|
if v.Op != OpARM64MOVDconst {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
c := v.AuxInt
|
|
|
|
|
if !(int32(c) == 0) {
|
|
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
2016-09-28 11:20:58 -04:00
|
|
|
b.swapSuccessors()
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
// match: (NZW (MOVDconst [c]) yes no)
|
|
|
|
|
// cond: int32(c) != 0
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-09-28 11:20:58 -04:00
|
|
|
if v.Op != OpARM64MOVDconst {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
c := v.AuxInt
|
|
|
|
|
if !(int32(c) != 0) {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
case BlockARM64UGE:
|
|
|
|
|
// match: (UGE (FlagEQ) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64FlagEQ {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UGE (FlagLT_ULT) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil no yes)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64FlagLT_ULT {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UGE (FlagLT_UGT) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64FlagLT_UGT {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UGE (FlagGT_ULT) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil no yes)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64FlagGT_ULT {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UGE (FlagGT_UGT) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64FlagGT_UGT {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UGE (InvertFlags cmp) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (ULE cmp yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64InvertFlags {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
cmp := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64ULE
|
|
|
|
|
b.SetControl(cmp)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
case BlockARM64UGT:
|
|
|
|
|
// match: (UGT (FlagEQ) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil no yes)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64FlagEQ {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UGT (FlagLT_ULT) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil no yes)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64FlagLT_ULT {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UGT (FlagLT_UGT) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64FlagLT_UGT {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UGT (FlagGT_ULT) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil no yes)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64FlagGT_ULT {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UGT (FlagGT_UGT) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64FlagGT_UGT {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (UGT (InvertFlags cmp) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (ULT cmp yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64InvertFlags {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
cmp := v.Args[0]
|
2016-07-21 12:42:49 -04:00
|
|
|
b.Kind = BlockARM64ULT
|
2016-08-03 09:56:36 -04:00
|
|
|
b.SetControl(cmp)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
case BlockARM64ULE:
|
|
|
|
|
// match: (ULE (FlagEQ) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64FlagEQ {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ULE (FlagLT_ULT) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64FlagLT_ULT {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ULE (FlagLT_UGT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagLT_UGT {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ULE (FlagGT_ULT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagGT_ULT {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ULE (FlagGT_UGT) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil no yes)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64FlagGT_UGT {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ULE (InvertFlags cmp) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (UGE cmp yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64InvertFlags {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
cmp := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64UGE
|
|
|
|
|
b.SetControl(cmp)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
case BlockARM64ULT:
|
|
|
|
|
// match: (ULT (FlagEQ) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil no yes)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64FlagEQ {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ULT (FlagLT_ULT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagLT_ULT {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ULT (FlagLT_UGT) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil no yes)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64FlagLT_UGT {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ULT (FlagGT_ULT) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64FlagGT_ULT {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
// match: (ULT (FlagGT_UGT) yes no)
|
2016-07-21 12:42:49 -04:00
|
|
|
// cond:
|
2016-08-03 09:56:36 -04:00
|
|
|
// result: (First nil no yes)
|
2016-07-21 12:42:49 -04:00
|
|
|
for {
|
|
|
|
|
v := b.Control
|
2016-08-03 09:56:36 -04:00
|
|
|
if v.Op != OpARM64FlagGT_UGT {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ULT (InvertFlags cmp) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (UGT cmp yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64InvertFlags {
|
2016-07-21 12:42:49 -04:00
|
|
|
break
|
|
|
|
|
}
|
2016-08-03 09:56:36 -04:00
|
|
|
cmp := v.Args[0]
|
|
|
|
|
b.Kind = BlockARM64UGT
|
|
|
|
|
b.SetControl(cmp)
|
2016-07-21 12:42:49 -04:00
|
|
|
return true
|
|
|
|
|
}
|
2016-09-28 11:20:58 -04:00
|
|
|
case BlockARM64Z:
|
|
|
|
|
// match: (Z (MOVDconst [0]) yes no)
|
|
|
|
|
// cond:
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if v.AuxInt != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Z (MOVDconst [c]) yes no)
|
|
|
|
|
// cond: c != 0
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
if !(c != 0) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
case BlockARM64ZW:
|
|
|
|
|
// match: (ZW (MOVDconst [c]) yes no)
|
|
|
|
|
// cond: int32(c) == 0
|
|
|
|
|
// result: (First nil yes no)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
if !(int32(c) == 0) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ZW (MOVDconst [c]) yes no)
|
|
|
|
|
// cond: int32(c) != 0
|
|
|
|
|
// result: (First nil no yes)
|
|
|
|
|
for {
|
|
|
|
|
v := b.Control
|
|
|
|
|
if v.Op != OpARM64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
c := v.AuxInt
|
|
|
|
|
if !(int32(c) != 0) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.Kind = BlockFirst
|
|
|
|
|
b.SetControl(nil)
|
|
|
|
|
b.swapSuccessors()
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-07-21 12:42:49 -04:00
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|