cmd/compile: memory clearing optimization for arm64

Use "STP (ZR, ZR), O(R)" instead of "MOVD ZR, O(R)" to implement memory clearing.
Also improve assembler supports to STP/LDP.
Results (A57@2GHzx8):

benchmark                   old ns/op     new ns/op     delta
BenchmarkClearFat8-8        1.00          1.00          +0.00%
BenchmarkClearFat12-8       1.01          1.01          +0.00%
BenchmarkClearFat16-8       1.01          1.01          +0.00%
BenchmarkClearFat24-8       1.52          1.52          +0.00%
BenchmarkClearFat32-8       3.00          2.02          -32.67%
BenchmarkClearFat40-8       3.50          2.52          -28.00%
BenchmarkClearFat48-8       3.50          3.03          -13.43%
BenchmarkClearFat56-8       4.00          3.50          -12.50%
BenchmarkClearFat64-8       4.25          4.00          -5.88%
BenchmarkClearFat128-8      8.01          8.01          +0.00%
BenchmarkClearFat256-8      16.1          16.0          -0.62%
BenchmarkClearFat512-8      32.1          32.0          -0.31%
BenchmarkClearFat1024-8     64.1          64.1          +0.00%

Change-Id: Ie5f5eac271ff685884775005825f206167a5c146
Reviewed-on: https://go-review.googlesource.com/55610
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
Wei Xiao 2017-07-27 01:55:03 +00:00 committed by Cherry Zhang
parent 9c99512d18
commit c02fc1605a
11 changed files with 930 additions and 255 deletions

View file

@ -31,13 +31,18 @@ func zerorange(pp *gc.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog {
p = pp.Appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGSP, 8+off+i) p = pp.Appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGSP, 8+off+i)
} }
} else if cnt <= int64(128*gc.Widthptr) && !darwin { // darwin ld64 cannot handle BR26 reloc with non-zero addend } else if cnt <= int64(128*gc.Widthptr) && !darwin { // darwin ld64 cannot handle BR26 reloc with non-zero addend
if cnt%(2*int64(gc.Widthptr)) != 0 {
p = pp.Appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGSP, 8+off)
off += int64(gc.Widthptr)
cnt -= int64(gc.Widthptr)
}
p = pp.Appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REGRT1, 0) p = pp.Appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REGRT1, 0)
p = pp.Appendpp(p, arm64.AADD, obj.TYPE_CONST, 0, 8+off-8, obj.TYPE_REG, arm64.REGRT1, 0) p = pp.Appendpp(p, arm64.AADD, obj.TYPE_CONST, 0, 8+off, obj.TYPE_REG, arm64.REGRT1, 0)
p.Reg = arm64.REGRT1 p.Reg = arm64.REGRT1
p = pp.Appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) p = pp.Appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0)
p.To.Name = obj.NAME_EXTERN p.To.Name = obj.NAME_EXTERN
p.To.Sym = gc.Duffzero p.To.Sym = gc.Duffzero
p.To.Offset = 4 * (128 - cnt/int64(gc.Widthptr)) p.To.Offset = 4 * (64 - cnt/(2*int64(gc.Widthptr)))
} else { } else {
p = pp.Appendpp(p, arm64.AMOVD, obj.TYPE_CONST, 0, 8+off-8, obj.TYPE_REG, arm64.REGTMP, 0) p = pp.Appendpp(p, arm64.AMOVD, obj.TYPE_CONST, 0, 8+off-8, obj.TYPE_REG, arm64.REGTMP, 0)
p = pp.Appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REGRT1, 0) p = pp.Appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REGRT1, 0)

View file

@ -324,6 +324,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_MEM p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg() p.To.Reg = v.Args[0].Reg()
gc.AddAux(&p.To, v) gc.AddAux(&p.To, v)
case ssa.OpARM64STP:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REGREG
p.From.Reg = v.Args[1].Reg()
p.From.Offset = int64(v.Args[2].Reg())
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
gc.AddAux(&p.To, v)
case ssa.OpARM64MOVBstorezero, case ssa.OpARM64MOVBstorezero,
ssa.OpARM64MOVHstorezero, ssa.OpARM64MOVHstorezero,
ssa.OpARM64MOVWstorezero, ssa.OpARM64MOVWstorezero,
@ -334,6 +342,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_MEM p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg() p.To.Reg = v.Args[0].Reg()
gc.AddAux(&p.To, v) gc.AddAux(&p.To, v)
case ssa.OpARM64MOVQstorezero:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REGREG
p.From.Reg = arm64.REGZERO
p.From.Offset = int64(arm64.REGZERO)
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
gc.AddAux(&p.To, v)
case ssa.OpARM64LoweredAtomicExchange64, case ssa.OpARM64LoweredAtomicExchange64,
ssa.OpARM64LoweredAtomicExchange32: ssa.OpARM64LoweredAtomicExchange32:
// LDAXR (Rarg0), Rout // LDAXR (Rarg0), Rout
@ -559,30 +575,25 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg() p.To.Reg = v.Reg()
case ssa.OpARM64DUFFZERO: case ssa.OpARM64DUFFZERO:
// runtime.duffzero expects start address - 8 in R16 // runtime.duffzero expects start address in R16
p := s.Prog(arm64.ASUB) p := s.Prog(obj.ADUFFZERO)
p.From.Type = obj.TYPE_CONST
p.From.Offset = 8
p.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = arm64.REG_R16
p = s.Prog(obj.ADUFFZERO)
p.To.Type = obj.TYPE_MEM p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN p.To.Name = obj.NAME_EXTERN
p.To.Sym = gc.Duffzero p.To.Sym = gc.Duffzero
p.To.Offset = v.AuxInt p.To.Offset = v.AuxInt
case ssa.OpARM64LoweredZero: case ssa.OpARM64LoweredZero:
// MOVD.P ZR, 8(R16) // STP.P (ZR,ZR), 16(R16)
// CMP Rarg1, R16 // CMP Rarg1, R16
// BLE -2(PC) // BLE -2(PC)
// arg1 is the address of the last element to zero // arg1 is the address of the last 16-byte unit to zero
p := s.Prog(arm64.AMOVD) p := s.Prog(arm64.ASTP)
p.Scond = arm64.C_XPOST p.Scond = arm64.C_XPOST
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REGREG
p.From.Reg = arm64.REGZERO p.From.Reg = arm64.REGZERO
p.From.Offset = int64(arm64.REGZERO)
p.To.Type = obj.TYPE_MEM p.To.Type = obj.TYPE_MEM
p.To.Reg = arm64.REG_R16 p.To.Reg = arm64.REG_R16
p.To.Offset = 8 p.To.Offset = 16
p2 := s.Prog(arm64.ACMP) p2 := s.Prog(arm64.ACMP)
p2.From.Type = obj.TYPE_REG p2.From.Type = obj.TYPE_REG
p2.From.Reg = v.Args[1].Reg() p2.From.Reg = v.Args[1].Reg()

View file

@ -365,36 +365,69 @@
(MOVBstore [6] ptr (MOVDconst [0]) (MOVBstore [6] ptr (MOVDconst [0])
(MOVHstore [4] ptr (MOVDconst [0]) (MOVHstore [4] ptr (MOVDconst [0])
(MOVWstore ptr (MOVDconst [0]) mem))) (MOVWstore ptr (MOVDconst [0]) mem)))
(Zero [9] ptr mem) ->
(MOVBstore [8] ptr (MOVDconst [0])
(MOVDstore ptr (MOVDconst [0]) mem))
(Zero [10] ptr mem) ->
(MOVHstore [8] ptr (MOVDconst [0])
(MOVDstore ptr (MOVDconst [0]) mem))
(Zero [11] ptr mem) ->
(MOVBstore [10] ptr (MOVDconst [0])
(MOVHstore [8] ptr (MOVDconst [0])
(MOVDstore ptr (MOVDconst [0]) mem)))
(Zero [12] ptr mem) -> (Zero [12] ptr mem) ->
(MOVWstore [8] ptr (MOVDconst [0]) (MOVWstore [8] ptr (MOVDconst [0])
(MOVDstore ptr (MOVDconst [0]) mem)) (MOVDstore ptr (MOVDconst [0]) mem))
(Zero [16] ptr mem) -> (Zero [13] ptr mem) ->
(MOVDstore [8] ptr (MOVDconst [0]) (MOVBstore [12] ptr (MOVDconst [0])
(MOVDstore ptr (MOVDconst [0]) mem)) (MOVWstore [8] ptr (MOVDconst [0])
(Zero [24] ptr mem) ->
(MOVDstore [16] ptr (MOVDconst [0])
(MOVDstore [8] ptr (MOVDconst [0])
(MOVDstore ptr (MOVDconst [0]) mem))) (MOVDstore ptr (MOVDconst [0]) mem)))
(Zero [14] ptr mem) ->
(MOVHstore [12] ptr (MOVDconst [0])
(MOVWstore [8] ptr (MOVDconst [0])
(MOVDstore ptr (MOVDconst [0]) mem)))
(Zero [15] ptr mem) ->
(MOVBstore [14] ptr (MOVDconst [0])
(MOVHstore [12] ptr (MOVDconst [0])
(MOVWstore [8] ptr (MOVDconst [0])
(MOVDstore ptr (MOVDconst [0]) mem))))
(Zero [16] ptr mem) ->
(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)
(Zero [32] ptr mem) ->
(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))
(Zero [48] ptr mem) ->
(STP [32] ptr (MOVDconst [0]) (MOVDconst [0])
(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)))
(Zero [64] ptr mem) ->
(STP [48] ptr (MOVDconst [0]) (MOVDconst [0])
(STP [32] ptr (MOVDconst [0]) (MOVDconst [0])
(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))))
// strip off fractional word zeroing // strip off fractional word zeroing
(Zero [s] ptr mem) && s%8 != 0 && s > 8 -> (Zero [s] ptr mem) && s%16 != 0 && s > 16 ->
(Zero [s%8] (Zero [s-s%16]
(OffPtr <ptr.Type> ptr [s-s%8]) (OffPtr <ptr.Type> ptr [s%16])
(Zero [s-s%8] ptr mem)) (Zero [s%16] ptr mem))
// medium zeroing uses a duff device // medium zeroing uses a duff device
// 4, 8, and 128 are magic constants, see runtime/mkduff.go // 4, 16, and 64 are magic constants, see runtime/mkduff.go
(Zero [s] ptr mem) (Zero [s] ptr mem)
&& s%8 == 0 && s > 24 && s <= 8*128 && s%16 == 0 && s > 64 && s <= 16*64
&& !config.noDuffDevice -> && !config.noDuffDevice ->
(DUFFZERO [4 * (128 - int64(s/8))] ptr mem) (DUFFZERO [4 * (64 - int64(s/16))] ptr mem)
// large zeroing uses a loop // large zeroing uses a loop
(Zero [s] ptr mem) (Zero [s] ptr mem)
&& s%8 == 0 && (s > 8*128 || config.noDuffDevice) -> && s%16 == 0 && (s > 16*64 || config.noDuffDevice) ->
(LoweredZero (LoweredZero
ptr ptr
(ADDconst <ptr.Type> [s-8] ptr) (ADDconst <ptr.Type> [s-16] ptr)
mem) mem)
// moves // moves
@ -571,6 +604,9 @@
(MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2) (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) -> && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
(MOVDstore [off1+off2] {sym} ptr val mem) (MOVDstore [off1+off2] {sym} ptr val mem)
(STP [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem) && is32Bit(off1+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
(STP [off1+off2] {sym} ptr val1 val2 mem)
(FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2) (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) -> && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
(FMOVSstore [off1+off2] {sym} ptr val mem) (FMOVSstore [off1+off2] {sym} ptr val mem)
@ -589,6 +625,9 @@
(MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2) (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) -> && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
(MOVDstorezero [off1+off2] {sym} ptr mem) (MOVDstorezero [off1+off2] {sym} ptr mem)
(MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
(MOVQstorezero [off1+off2] {sym} ptr mem)
(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(off1+off2) && canMergeSym(sym1,sym2) && is32Bit(off1+off2)
@ -643,6 +682,10 @@
&& canMergeSym(sym1,sym2) && is32Bit(off1+off2) && canMergeSym(sym1,sym2) && is32Bit(off1+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) -> && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
(MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(STP [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem)
&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
(STP [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem)
(FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
&& canMergeSym(sym1,sym2) && is32Bit(off1+off2) && canMergeSym(sym1,sym2) && is32Bit(off1+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) -> && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
@ -667,12 +710,17 @@
&& canMergeSym(sym1,sym2) && is32Bit(off1+off2) && canMergeSym(sym1,sym2) && is32Bit(off1+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) -> && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
(MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
(MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
// store zero // store zero
(MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVBstorezero [off] {sym} ptr mem) (MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVBstorezero [off] {sym} ptr mem)
(MOVHstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVHstorezero [off] {sym} ptr mem) (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVHstorezero [off] {sym} ptr mem)
(MOVWstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVWstorezero [off] {sym} ptr mem) (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVWstorezero [off] {sym} ptr mem)
(MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVDstorezero [off] {sym} ptr mem) (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVDstorezero [off] {sym} ptr mem)
(STP [off] {sym} ptr (MOVDconst [0]) (MOVDconst [0]) mem) -> (MOVQstorezero [off] {sym} ptr mem)
// replace load from same location as preceding store with zero/sign extension (or copy in case of full width) // replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
// these seem to have bad interaction with other rules, resulting in slower code // these seem to have bad interaction with other rules, resulting in slower code

View file

@ -144,6 +144,7 @@ func init() {
gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}} gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}}
gpstore0 = regInfo{inputs: []regMask{gpspsbg}} gpstore0 = regInfo{inputs: []regMask{gpspsbg}}
gpstore2 = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}}
gpxchg = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}} gpxchg = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
gpcas = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}, outputs: []regMask{gp}} gpcas = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}, outputs: []regMask{gp}}
fp01 = regInfo{inputs: nil, outputs: []regMask{fp}} fp01 = regInfo{inputs: nil, outputs: []regMask{fp}}
@ -275,13 +276,15 @@ func init() {
{name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "MOVDstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVDstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "STP", argLength: 4, reg: gpstore2, aux: "SymOff", asm: "STP", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of arg1 and arg2 to arg0 + auxInt + aux. arg3=mem.
{name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of zero to arg0 + auxInt + aux. ar12=mem. {name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVQstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "STP", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of zero to arg0 + auxInt + aux. arg1=mem.
// conversions // conversions
{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"}, // move from arg0, sign-extended from byte {name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"}, // move from arg0, sign-extended from byte
@ -347,7 +350,7 @@ func init() {
aux: "Int64", aux: "Int64",
argLength: 2, argLength: 2,
reg: regInfo{ reg: regInfo{
inputs: []regMask{gp}, inputs: []regMask{buildReg("R16")},
clobbers: buildReg("R16 R30"), clobbers: buildReg("R16 R30"),
}, },
faultOnNilArg0: true, faultOnNilArg0: true,
@ -355,14 +358,14 @@ func init() {
// large zeroing // large zeroing
// arg0 = address of memory to zero (in R16 aka arm64.REGRT1, changed as side effect) // arg0 = address of memory to zero (in R16 aka arm64.REGRT1, changed as side effect)
// arg1 = address of the last element to zero // arg1 = address of the last 16-byte unit to zero
// arg2 = mem // arg2 = mem
// returns mem // returns mem
// MOVD.P ZR, 8(R16) // STP.P (ZR,ZR), 16(R16)
// CMP Rarg1, R16 // CMP Rarg1, R16
// BLE -2(PC) // BLE -2(PC)
// Note: the-end-of-the-memory may be not a valid pointer. it's a problem if it is spilled. // Note: the-end-of-the-memory may be not a valid pointer. it's a problem if it is spilled.
// the-end-of-the-memory - 8 is with the area to zero, ok to spill. // the-end-of-the-memory - 16 is with the area to zero, ok to spill.
{ {
name: "LoweredZero", name: "LoweredZero",
argLength: 3, argLength: 3,

View file

@ -999,12 +999,14 @@ const (
OpARM64MOVHstore OpARM64MOVHstore
OpARM64MOVWstore OpARM64MOVWstore
OpARM64MOVDstore OpARM64MOVDstore
OpARM64STP
OpARM64FMOVSstore OpARM64FMOVSstore
OpARM64FMOVDstore OpARM64FMOVDstore
OpARM64MOVBstorezero OpARM64MOVBstorezero
OpARM64MOVHstorezero OpARM64MOVHstorezero
OpARM64MOVWstorezero OpARM64MOVWstorezero
OpARM64MOVDstorezero OpARM64MOVDstorezero
OpARM64MOVQstorezero
OpARM64MOVBreg OpARM64MOVBreg
OpARM64MOVBUreg OpARM64MOVBUreg
OpARM64MOVHreg OpARM64MOVHreg
@ -12636,6 +12638,21 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "STP",
auxType: auxSymOff,
argLen: 4,
faultOnNilArg0: true,
symEffect: SymWrite,
asm: arm64.ASTP,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{ {
name: "FMOVSstore", name: "FMOVSstore",
auxType: auxSymOff, auxType: auxSymOff,
@ -12716,6 +12733,19 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "MOVQstorezero",
auxType: auxSymOff,
argLen: 2,
faultOnNilArg0: true,
symEffect: SymWrite,
asm: arm64.ASTP,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{ {
name: "MOVBreg", name: "MOVBreg",
argLen: 1, argLen: 1,
@ -13227,7 +13257,7 @@ var opcodeTable = [...]opInfo{
faultOnNilArg0: true, faultOnNilArg0: true,
reg: regInfo{ reg: regInfo{
inputs: []inputInfo{ inputs: []inputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 {0, 65536}, // R16
}, },
clobbers: 536936448, // R16 R30 clobbers: 536936448, // R16 R30
}, },

View file

@ -129,6 +129,8 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64MOVHstore_0(v) return rewriteValueARM64_OpARM64MOVHstore_0(v)
case OpARM64MOVHstorezero: case OpARM64MOVHstorezero:
return rewriteValueARM64_OpARM64MOVHstorezero_0(v) return rewriteValueARM64_OpARM64MOVHstorezero_0(v)
case OpARM64MOVQstorezero:
return rewriteValueARM64_OpARM64MOVQstorezero_0(v)
case OpARM64MOVWUload: case OpARM64MOVWUload:
return rewriteValueARM64_OpARM64MOVWUload_0(v) return rewriteValueARM64_OpARM64MOVWUload_0(v)
case OpARM64MOVWUreg: case OpARM64MOVWUreg:
@ -173,6 +175,8 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64SRL_0(v) return rewriteValueARM64_OpARM64SRL_0(v)
case OpARM64SRLconst: case OpARM64SRLconst:
return rewriteValueARM64_OpARM64SRLconst_0(v) return rewriteValueARM64_OpARM64SRLconst_0(v)
case OpARM64STP:
return rewriteValueARM64_OpARM64STP_0(v)
case OpARM64SUB: case OpARM64SUB:
return rewriteValueARM64_OpARM64SUB_0(v) return rewriteValueARM64_OpARM64SUB_0(v)
case OpARM64SUBconst: case OpARM64SUBconst:
@ -704,7 +708,7 @@ func rewriteValueARM64(v *Value) bool {
case OpXor8: case OpXor8:
return rewriteValueARM64_OpXor8_0(v) return rewriteValueARM64_OpXor8_0(v)
case OpZero: case OpZero:
return rewriteValueARM64_OpZero_0(v) || rewriteValueARM64_OpZero_10(v) return rewriteValueARM64_OpZero_0(v) || rewriteValueARM64_OpZero_10(v) || rewriteValueARM64_OpZero_20(v)
case OpZeroExt16to32: case OpZeroExt16to32:
return rewriteValueARM64_OpZeroExt16to32_0(v) return rewriteValueARM64_OpZeroExt16to32_0(v)
case OpZeroExt16to64: case OpZeroExt16to64:
@ -4983,6 +4987,62 @@ func rewriteValueARM64_OpARM64MOVHstorezero_0(v *Value) bool {
} }
return false return false
} }
func rewriteValueARM64_OpARM64MOVQstorezero_0(v *Value) bool {
b := v.Block
_ = b
config := b.Func.Config
_ = config
// match: (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
// result: (MOVQstorezero [off1+off2] {sym} ptr mem)
for {
off1 := v.AuxInt
sym := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARM64ADDconst {
break
}
off2 := v_0.AuxInt
ptr := v_0.Args[0]
mem := v.Args[1]
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
break
}
v.reset(OpARM64MOVQstorezero)
v.AuxInt = off1 + off2
v.Aux = sym
v.AddArg(ptr)
v.AddArg(mem)
return true
}
// match: (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
// result: (MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
for {
off1 := v.AuxInt
sym1 := v.Aux
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARM64MOVDaddr {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
ptr := v_0.Args[0]
mem := v.Args[1]
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
break
}
v.reset(OpARM64MOVQstorezero)
v.AuxInt = off1 + off2
v.Aux = mergeSym(sym1, sym2)
v.AddArg(ptr)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool { func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
b := v.Block b := v.Block
_ = b _ = b
@ -9174,6 +9234,100 @@ func rewriteValueARM64_OpARM64SRLconst_0(v *Value) bool {
} }
return false return false
} }
func rewriteValueARM64_OpARM64STP_0(v *Value) bool {
b := v.Block
_ = b
config := b.Func.Config
_ = config
// match: (STP [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem)
// cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
// result: (STP [off1+off2] {sym} ptr val1 val2 mem)
for {
off1 := v.AuxInt
sym := v.Aux
_ = v.Args[3]
v_0 := v.Args[0]
if v_0.Op != OpARM64ADDconst {
break
}
off2 := v_0.AuxInt
ptr := v_0.Args[0]
val1 := v.Args[1]
val2 := v.Args[2]
mem := v.Args[3]
if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
break
}
v.reset(OpARM64STP)
v.AuxInt = off1 + off2
v.Aux = sym
v.AddArg(ptr)
v.AddArg(val1)
v.AddArg(val2)
v.AddArg(mem)
return true
}
// match: (STP [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem)
// cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
// result: (STP [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem)
for {
off1 := v.AuxInt
sym1 := v.Aux
_ = v.Args[3]
v_0 := v.Args[0]
if v_0.Op != OpARM64MOVDaddr {
break
}
off2 := v_0.AuxInt
sym2 := v_0.Aux
ptr := v_0.Args[0]
val1 := v.Args[1]
val2 := v.Args[2]
mem := v.Args[3]
if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
break
}
v.reset(OpARM64STP)
v.AuxInt = off1 + off2
v.Aux = mergeSym(sym1, sym2)
v.AddArg(ptr)
v.AddArg(val1)
v.AddArg(val2)
v.AddArg(mem)
return true
}
// match: (STP [off] {sym} ptr (MOVDconst [0]) (MOVDconst [0]) mem)
// cond:
// result: (MOVQstorezero [off] {sym} ptr mem)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[3]
ptr := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64MOVDconst {
break
}
if v_1.AuxInt != 0 {
break
}
v_2 := v.Args[2]
if v_2.Op != OpARM64MOVDconst {
break
}
if v_2.AuxInt != 0 {
break
}
mem := v.Args[3]
v.reset(OpARM64MOVQstorezero)
v.AuxInt = off
v.Aux = sym
v.AddArg(ptr)
v.AddArg(mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64SUB_0(v *Value) bool { func rewriteValueARM64_OpARM64SUB_0(v *Value) bool {
b := v.Block b := v.Block
_ = b _ = b
@ -16225,6 +16379,95 @@ func rewriteValueARM64_OpZero_0(v *Value) bool {
v.AddArg(v1) v.AddArg(v1)
return true return true
} }
// match: (Zero [9] ptr mem)
// cond:
// result: (MOVBstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem))
for {
if v.AuxInt != 9 {
break
}
_ = v.Args[1]
ptr := v.Args[0]
mem := v.Args[1]
v.reset(OpARM64MOVBstore)
v.AuxInt = 8
v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v0.AuxInt = 0
v.AddArg(v0)
v1 := b.NewValue0(v.Pos, OpARM64MOVDstore, types.TypeMem)
v1.AddArg(ptr)
v2 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v2.AuxInt = 0
v1.AddArg(v2)
v1.AddArg(mem)
v.AddArg(v1)
return true
}
return false
}
func rewriteValueARM64_OpZero_10(v *Value) bool {
b := v.Block
_ = b
typ := &b.Func.Config.Types
_ = typ
// match: (Zero [10] ptr mem)
// cond:
// result: (MOVHstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem))
for {
if v.AuxInt != 10 {
break
}
_ = v.Args[1]
ptr := v.Args[0]
mem := v.Args[1]
v.reset(OpARM64MOVHstore)
v.AuxInt = 8
v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v0.AuxInt = 0
v.AddArg(v0)
v1 := b.NewValue0(v.Pos, OpARM64MOVDstore, types.TypeMem)
v1.AddArg(ptr)
v2 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v2.AuxInt = 0
v1.AddArg(v2)
v1.AddArg(mem)
v.AddArg(v1)
return true
}
// match: (Zero [11] ptr mem)
// cond:
// result: (MOVBstore [10] ptr (MOVDconst [0]) (MOVHstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)))
for {
if v.AuxInt != 11 {
break
}
_ = v.Args[1]
ptr := v.Args[0]
mem := v.Args[1]
v.reset(OpARM64MOVBstore)
v.AuxInt = 10
v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v0.AuxInt = 0
v.AddArg(v0)
v1 := b.NewValue0(v.Pos, OpARM64MOVHstore, types.TypeMem)
v1.AuxInt = 8
v1.AddArg(ptr)
v2 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v2.AuxInt = 0
v1.AddArg(v2)
v3 := b.NewValue0(v.Pos, OpARM64MOVDstore, types.TypeMem)
v3.AddArg(ptr)
v4 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v4.AuxInt = 0
v3.AddArg(v4)
v3.AddArg(mem)
v1.AddArg(v3)
v.AddArg(v1)
return true
}
// match: (Zero [12] ptr mem) // match: (Zero [12] ptr mem)
// cond: // cond:
// result: (MOVWstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)) // result: (MOVWstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem))
@ -16250,57 +16493,23 @@ func rewriteValueARM64_OpZero_0(v *Value) bool {
v.AddArg(v1) v.AddArg(v1)
return true return true
} }
return false // match: (Zero [13] ptr mem)
}
func rewriteValueARM64_OpZero_10(v *Value) bool {
b := v.Block
_ = b
config := b.Func.Config
_ = config
typ := &b.Func.Config.Types
_ = typ
// match: (Zero [16] ptr mem)
// cond: // cond:
// result: (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)) // result: (MOVBstore [12] ptr (MOVDconst [0]) (MOVWstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)))
for { for {
if v.AuxInt != 16 { if v.AuxInt != 13 {
break break
} }
_ = v.Args[1] _ = v.Args[1]
ptr := v.Args[0] ptr := v.Args[0]
mem := v.Args[1] mem := v.Args[1]
v.reset(OpARM64MOVDstore) v.reset(OpARM64MOVBstore)
v.AuxInt = 8 v.AuxInt = 12
v.AddArg(ptr) v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64) v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v0.AuxInt = 0 v0.AuxInt = 0
v.AddArg(v0) v.AddArg(v0)
v1 := b.NewValue0(v.Pos, OpARM64MOVDstore, types.TypeMem) v1 := b.NewValue0(v.Pos, OpARM64MOVWstore, types.TypeMem)
v1.AddArg(ptr)
v2 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v2.AuxInt = 0
v1.AddArg(v2)
v1.AddArg(mem)
v.AddArg(v1)
return true
}
// match: (Zero [24] ptr mem)
// cond:
// result: (MOVDstore [16] ptr (MOVDconst [0]) (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)))
for {
if v.AuxInt != 24 {
break
}
_ = v.Args[1]
ptr := v.Args[0]
mem := v.Args[1]
v.reset(OpARM64MOVDstore)
v.AuxInt = 16
v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v0.AuxInt = 0
v.AddArg(v0)
v1 := b.NewValue0(v.Pos, OpARM64MOVDstore, types.TypeMem)
v1.AuxInt = 8 v1.AuxInt = 8
v1.AddArg(ptr) v1.AddArg(ptr)
v2 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64) v2 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
@ -16316,62 +16525,288 @@ func rewriteValueARM64_OpZero_10(v *Value) bool {
v.AddArg(v1) v.AddArg(v1)
return true return true
} }
// match: (Zero [14] ptr mem)
// cond:
// result: (MOVHstore [12] ptr (MOVDconst [0]) (MOVWstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)))
for {
if v.AuxInt != 14 {
break
}
_ = v.Args[1]
ptr := v.Args[0]
mem := v.Args[1]
v.reset(OpARM64MOVHstore)
v.AuxInt = 12
v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v0.AuxInt = 0
v.AddArg(v0)
v1 := b.NewValue0(v.Pos, OpARM64MOVWstore, types.TypeMem)
v1.AuxInt = 8
v1.AddArg(ptr)
v2 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v2.AuxInt = 0
v1.AddArg(v2)
v3 := b.NewValue0(v.Pos, OpARM64MOVDstore, types.TypeMem)
v3.AddArg(ptr)
v4 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v4.AuxInt = 0
v3.AddArg(v4)
v3.AddArg(mem)
v1.AddArg(v3)
v.AddArg(v1)
return true
}
// match: (Zero [15] ptr mem)
// cond:
// result: (MOVBstore [14] ptr (MOVDconst [0]) (MOVHstore [12] ptr (MOVDconst [0]) (MOVWstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem))))
for {
if v.AuxInt != 15 {
break
}
_ = v.Args[1]
ptr := v.Args[0]
mem := v.Args[1]
v.reset(OpARM64MOVBstore)
v.AuxInt = 14
v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v0.AuxInt = 0
v.AddArg(v0)
v1 := b.NewValue0(v.Pos, OpARM64MOVHstore, types.TypeMem)
v1.AuxInt = 12
v1.AddArg(ptr)
v2 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v2.AuxInt = 0
v1.AddArg(v2)
v3 := b.NewValue0(v.Pos, OpARM64MOVWstore, types.TypeMem)
v3.AuxInt = 8
v3.AddArg(ptr)
v4 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v4.AuxInt = 0
v3.AddArg(v4)
v5 := b.NewValue0(v.Pos, OpARM64MOVDstore, types.TypeMem)
v5.AddArg(ptr)
v6 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v6.AuxInt = 0
v5.AddArg(v6)
v5.AddArg(mem)
v3.AddArg(v5)
v1.AddArg(v3)
v.AddArg(v1)
return true
}
// match: (Zero [16] ptr mem)
// cond:
// result: (STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)
for {
if v.AuxInt != 16 {
break
}
_ = v.Args[1]
ptr := v.Args[0]
mem := v.Args[1]
v.reset(OpARM64STP)
v.AuxInt = 0
v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v0.AuxInt = 0
v.AddArg(v0)
v1 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v1.AuxInt = 0
v.AddArg(v1)
v.AddArg(mem)
return true
}
// match: (Zero [32] ptr mem)
// cond:
// result: (STP [16] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))
for {
if v.AuxInt != 32 {
break
}
_ = v.Args[1]
ptr := v.Args[0]
mem := v.Args[1]
v.reset(OpARM64STP)
v.AuxInt = 16
v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v0.AuxInt = 0
v.AddArg(v0)
v1 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v1.AuxInt = 0
v.AddArg(v1)
v2 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
v2.AuxInt = 0
v2.AddArg(ptr)
v3 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v3.AuxInt = 0
v2.AddArg(v3)
v4 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v4.AuxInt = 0
v2.AddArg(v4)
v2.AddArg(mem)
v.AddArg(v2)
return true
}
// match: (Zero [48] ptr mem)
// cond:
// result: (STP [32] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [16] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)))
for {
if v.AuxInt != 48 {
break
}
_ = v.Args[1]
ptr := v.Args[0]
mem := v.Args[1]
v.reset(OpARM64STP)
v.AuxInt = 32
v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v0.AuxInt = 0
v.AddArg(v0)
v1 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v1.AuxInt = 0
v.AddArg(v1)
v2 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
v2.AuxInt = 16
v2.AddArg(ptr)
v3 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v3.AuxInt = 0
v2.AddArg(v3)
v4 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v4.AuxInt = 0
v2.AddArg(v4)
v5 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
v5.AuxInt = 0
v5.AddArg(ptr)
v6 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v6.AuxInt = 0
v5.AddArg(v6)
v7 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v7.AuxInt = 0
v5.AddArg(v7)
v5.AddArg(mem)
v2.AddArg(v5)
v.AddArg(v2)
return true
}
// match: (Zero [64] ptr mem)
// cond:
// result: (STP [48] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [32] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [16] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))))
for {
if v.AuxInt != 64 {
break
}
_ = v.Args[1]
ptr := v.Args[0]
mem := v.Args[1]
v.reset(OpARM64STP)
v.AuxInt = 48
v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v0.AuxInt = 0
v.AddArg(v0)
v1 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v1.AuxInt = 0
v.AddArg(v1)
v2 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
v2.AuxInt = 32
v2.AddArg(ptr)
v3 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v3.AuxInt = 0
v2.AddArg(v3)
v4 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v4.AuxInt = 0
v2.AddArg(v4)
v5 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
v5.AuxInt = 16
v5.AddArg(ptr)
v6 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v6.AuxInt = 0
v5.AddArg(v6)
v7 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v7.AuxInt = 0
v5.AddArg(v7)
v8 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
v8.AuxInt = 0
v8.AddArg(ptr)
v9 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v9.AuxInt = 0
v8.AddArg(v9)
v10 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
v10.AuxInt = 0
v8.AddArg(v10)
v8.AddArg(mem)
v5.AddArg(v8)
v2.AddArg(v5)
v.AddArg(v2)
return true
}
return false
}
func rewriteValueARM64_OpZero_20(v *Value) bool {
b := v.Block
_ = b
config := b.Func.Config
_ = config
// match: (Zero [s] ptr mem) // match: (Zero [s] ptr mem)
// cond: s%8 != 0 && s > 8 // cond: s%16 != 0 && s > 16
// result: (Zero [s%8] (OffPtr <ptr.Type> ptr [s-s%8]) (Zero [s-s%8] ptr mem)) // result: (Zero [s-s%16] (OffPtr <ptr.Type> ptr [s%16]) (Zero [s%16] ptr mem))
for { for {
s := v.AuxInt s := v.AuxInt
_ = v.Args[1] _ = v.Args[1]
ptr := v.Args[0] ptr := v.Args[0]
mem := v.Args[1] mem := v.Args[1]
if !(s%8 != 0 && s > 8) { if !(s%16 != 0 && s > 16) {
break break
} }
v.reset(OpZero) v.reset(OpZero)
v.AuxInt = s % 8 v.AuxInt = s - s%16
v0 := b.NewValue0(v.Pos, OpOffPtr, ptr.Type) v0 := b.NewValue0(v.Pos, OpOffPtr, ptr.Type)
v0.AuxInt = s - s%8 v0.AuxInt = s % 16
v0.AddArg(ptr) v0.AddArg(ptr)
v.AddArg(v0) v.AddArg(v0)
v1 := b.NewValue0(v.Pos, OpZero, types.TypeMem) v1 := b.NewValue0(v.Pos, OpZero, types.TypeMem)
v1.AuxInt = s - s%8 v1.AuxInt = s % 16
v1.AddArg(ptr) v1.AddArg(ptr)
v1.AddArg(mem) v1.AddArg(mem)
v.AddArg(v1) v.AddArg(v1)
return true return true
} }
// match: (Zero [s] ptr mem) // match: (Zero [s] ptr mem)
// cond: s%8 == 0 && s > 24 && s <= 8*128 && !config.noDuffDevice // cond: s%16 == 0 && s > 64 && s <= 16*64 && !config.noDuffDevice
// result: (DUFFZERO [4 * (128 - int64(s/8))] ptr mem) // result: (DUFFZERO [4 * (64 - int64(s/16))] ptr mem)
for { for {
s := v.AuxInt s := v.AuxInt
_ = v.Args[1] _ = v.Args[1]
ptr := v.Args[0] ptr := v.Args[0]
mem := v.Args[1] mem := v.Args[1]
if !(s%8 == 0 && s > 24 && s <= 8*128 && !config.noDuffDevice) { if !(s%16 == 0 && s > 64 && s <= 16*64 && !config.noDuffDevice) {
break break
} }
v.reset(OpARM64DUFFZERO) v.reset(OpARM64DUFFZERO)
v.AuxInt = 4 * (128 - int64(s/8)) v.AuxInt = 4 * (64 - int64(s/16))
v.AddArg(ptr) v.AddArg(ptr)
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (Zero [s] ptr mem) // match: (Zero [s] ptr mem)
// cond: s%8 == 0 && (s > 8*128 || config.noDuffDevice) // cond: s%16 == 0 && (s > 16*64 || config.noDuffDevice)
// result: (LoweredZero ptr (ADDconst <ptr.Type> [s-8] ptr) mem) // result: (LoweredZero ptr (ADDconst <ptr.Type> [s-16] ptr) mem)
for { for {
s := v.AuxInt s := v.AuxInt
_ = v.Args[1] _ = v.Args[1]
ptr := v.Args[0] ptr := v.Args[0]
mem := v.Args[1] mem := v.Args[1]
if !(s%8 == 0 && (s > 8*128 || config.noDuffDevice)) { if !(s%16 == 0 && (s > 16*64 || config.noDuffDevice)) {
break break
} }
v.reset(OpARM64LoweredZero) v.reset(OpARM64LoweredZero)
v.AddArg(ptr) v.AddArg(ptr)
v0 := b.NewValue0(v.Pos, OpARM64ADDconst, ptr.Type) v0 := b.NewValue0(v.Pos, OpARM64ADDconst, ptr.Type)
v0.AuxInt = s - 8 v0.AuxInt = s - 16
v0.AddArg(ptr) v0.AddArg(ptr)
v.AddArg(v0) v.AddArg(v0)
v.AddArg(mem) v.AddArg(mem)

View file

@ -291,8 +291,10 @@ const (
C_NPAUTO // -512 <= x < 0, 0 mod 8 C_NPAUTO // -512 <= x < 0, 0 mod 8
C_NSAUTO // -256 <= x < 0 C_NSAUTO // -256 <= x < 0
C_PSAUTO_8 // 0 to 255, 0 mod 8
C_PSAUTO // 0 to 255 C_PSAUTO // 0 to 255
C_PPAUTO // 0 to 504, 0 mod 8 C_PPAUTO_8 // 0 to 504, 0 mod 8
C_PPAUTO // 0 to 504
C_UAUTO4K_8 // 0 to 4095, 0 mod 8 C_UAUTO4K_8 // 0 to 4095, 0 mod 8
C_UAUTO4K_4 // 0 to 4095, 0 mod 4 C_UAUTO4K_4 // 0 to 4095, 0 mod 4
C_UAUTO4K_2 // 0 to 4095, 0 mod 2 C_UAUTO4K_2 // 0 to 4095, 0 mod 2
@ -315,7 +317,9 @@ const (
C_ZOREG // 0(R) C_ZOREG // 0(R)
C_NPOREG // must mirror NPAUTO, etc C_NPOREG // must mirror NPAUTO, etc
C_NSOREG C_NSOREG
C_PSOREG_8
C_PSOREG C_PSOREG
C_PPOREG_8
C_PPOREG C_PPOREG
C_UOREG4K_8 C_UOREG4K_8
C_UOREG4K_4 C_UOREG4K_4

View file

@ -35,7 +35,9 @@ var cnames7 = []string{
"LBRA", "LBRA",
"NPAUTO", "NPAUTO",
"NSAUTO", "NSAUTO",
"PSAUTO_8",
"PSAUTO", "PSAUTO",
"PPAUTO_8",
"PPAUTO", "PPAUTO",
"UAUTO4K_8", "UAUTO4K_8",
"UAUTO4K_4", "UAUTO4K_4",
@ -57,7 +59,9 @@ var cnames7 = []string{
"ZOREG", "ZOREG",
"NPOREG", "NPOREG",
"NSOREG", "NSOREG",
"PSOREG_8",
"PSOREG", "PSOREG",
"PPOREG_8",
"PPOREG", "PPOREG",
"UOREG4K_8", "UOREG4K_8",
"UOREG4K_4", "UOREG4K_4",

View file

@ -427,12 +427,57 @@ var optab = []Optab{
{AFMOVS, C_FREG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AFMOVS, C_FREG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE},
{AFMOVD, C_FREG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AFMOVD, C_FREG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE},
/* pre/post-indexed load/store register pair /* pre/post-indexed/signed-offset load/store register pair
(unscaled, signed 10-bit quad-aligned offset) */ (unscaled, signed 10-bit quad-aligned and long offset) */
{ALDP, C_LOREG, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE}, {ALDP, C_NPAUTO, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0},
{ALDP, C_LOREG, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST}, {ALDP, C_NPAUTO, C_NONE, C_PAIR, 66, 4, REGSP, 0, C_XPRE},
{ASTP, C_PAIR, C_NONE, C_LOREG, 67, 4, 0, 0, C_XPRE}, {ALDP, C_NPAUTO, C_NONE, C_PAIR, 66, 4, REGSP, 0, C_XPOST},
{ASTP, C_PAIR, C_NONE, C_LOREG, 67, 4, 0, 0, C_XPOST}, {ALDP, C_PPAUTO_8, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0},
{ALDP, C_PPAUTO_8, C_NONE, C_PAIR, 66, 4, REGSP, 0, C_XPRE},
{ALDP, C_PPAUTO_8, C_NONE, C_PAIR, 66, 4, REGSP, 0, C_XPOST},
{ALDP, C_UAUTO4K, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0},
{ALDP, C_UAUTO4K, C_NONE, C_PAIR, 74, 8, REGSP, 0, C_XPRE},
{ALDP, C_UAUTO4K, C_NONE, C_PAIR, 74, 8, REGSP, 0, C_XPOST},
{ALDP, C_LAUTO, C_NONE, C_PAIR, 75, 12, REGSP, LFROM, 0},
{ALDP, C_LAUTO, C_NONE, C_PAIR, 75, 12, REGSP, LFROM, C_XPRE},
{ALDP, C_LAUTO, C_NONE, C_PAIR, 75, 12, REGSP, LFROM, C_XPOST},
{ALDP, C_NPOREG, C_NONE, C_PAIR, 66, 4, 0, 0, 0},
{ALDP, C_NPOREG, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE},
{ALDP, C_NPOREG, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST},
{ALDP, C_PPOREG_8, C_NONE, C_PAIR, 66, 4, 0, 0, 0},
{ALDP, C_PPOREG_8, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE},
{ALDP, C_PPOREG_8, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST},
{ALDP, C_UOREG4K, C_NONE, C_PAIR, 74, 8, 0, 0, 0},
{ALDP, C_UOREG4K, C_NONE, C_PAIR, 74, 8, 0, 0, C_XPRE},
{ALDP, C_UOREG4K, C_NONE, C_PAIR, 74, 8, 0, 0, C_XPOST},
{ALDP, C_LOREG, C_NONE, C_PAIR, 75, 12, 0, LFROM, 0},
{ALDP, C_LOREG, C_NONE, C_PAIR, 75, 12, 0, LFROM, C_XPRE},
{ALDP, C_LOREG, C_NONE, C_PAIR, 75, 12, 0, LFROM, C_XPOST},
{ASTP, C_PAIR, C_NONE, C_NPAUTO, 67, 4, REGSP, 0, 0},
{ASTP, C_PAIR, C_NONE, C_NPAUTO, 67, 4, REGSP, 0, C_XPRE},
{ASTP, C_PAIR, C_NONE, C_NPAUTO, 67, 4, REGSP, 0, C_XPOST},
{ASTP, C_PAIR, C_NONE, C_PPAUTO_8, 67, 4, REGSP, 0, 0},
{ASTP, C_PAIR, C_NONE, C_PPAUTO_8, 67, 4, REGSP, 0, C_XPRE},
{ASTP, C_PAIR, C_NONE, C_PPAUTO_8, 67, 4, REGSP, 0, C_XPOST},
{ASTP, C_PAIR, C_NONE, C_UAUTO4K, 76, 8, REGSP, 0, 0},
{ASTP, C_PAIR, C_NONE, C_UAUTO4K, 76, 8, REGSP, 0, C_XPRE},
{ASTP, C_PAIR, C_NONE, C_UAUTO4K, 76, 8, REGSP, 0, C_XPOST},
{ASTP, C_PAIR, C_NONE, C_LAUTO, 77, 12, REGSP, LTO, 0},
{ASTP, C_PAIR, C_NONE, C_LAUTO, 77, 12, REGSP, LTO, C_XPRE},
{ASTP, C_PAIR, C_NONE, C_LAUTO, 77, 12, REGSP, LTO, C_XPOST},
{ASTP, C_PAIR, C_NONE, C_NPOREG, 67, 4, 0, 0, 0},
{ASTP, C_PAIR, C_NONE, C_NPOREG, 67, 4, 0, 0, C_XPRE},
{ASTP, C_PAIR, C_NONE, C_NPOREG, 67, 4, 0, 0, C_XPOST},
{ASTP, C_PAIR, C_NONE, C_PPOREG_8, 67, 4, 0, 0, 0},
{ASTP, C_PAIR, C_NONE, C_PPOREG_8, 67, 4, 0, 0, C_XPRE},
{ASTP, C_PAIR, C_NONE, C_PPOREG_8, 67, 4, 0, 0, C_XPOST},
{ASTP, C_PAIR, C_NONE, C_UOREG4K, 76, 8, 0, 0, 0},
{ASTP, C_PAIR, C_NONE, C_UOREG4K, 76, 8, 0, 0, C_XPRE},
{ASTP, C_PAIR, C_NONE, C_UOREG4K, 76, 8, 0, 0, C_XPOST},
{ASTP, C_PAIR, C_NONE, C_LOREG, 77, 12, 0, LTO, 0},
{ASTP, C_PAIR, C_NONE, C_LOREG, 77, 12, 0, LTO, C_XPRE},
{ASTP, C_PAIR, C_NONE, C_LOREG, 77, 12, 0, LTO, C_XPOST},
/* special */ /* special */
{AMOVD, C_SPR, C_NONE, C_REG, 35, 4, 0, 0, 0}, {AMOVD, C_SPR, C_NONE, C_REG, 35, 4, 0, 0, 0},
@ -761,7 +806,9 @@ func (c *ctxt7) addpool(p *obj.Prog, a *obj.Addr) {
fallthrough fallthrough
case C_PSAUTO, case C_PSAUTO,
C_PSAUTO_8,
C_PPAUTO, C_PPAUTO,
C_PPAUTO_8,
C_UAUTO4K_8, C_UAUTO4K_8,
C_UAUTO4K_4, C_UAUTO4K_4,
C_UAUTO4K_2, C_UAUTO4K_2,
@ -776,7 +823,9 @@ func (c *ctxt7) addpool(p *obj.Prog, a *obj.Addr) {
C_NPAUTO, C_NPAUTO,
C_LAUTO, C_LAUTO,
C_PPOREG, C_PPOREG,
C_PPOREG_8,
C_PSOREG, C_PSOREG,
C_PSOREG_8,
C_UOREG4K_8, C_UOREG4K_8,
C_UOREG4K_4, C_UOREG4K_4,
C_UOREG4K_2, C_UOREG4K_2,
@ -997,9 +1046,15 @@ func autoclass(l int64) int {
} }
if l <= 255 { if l <= 255 {
if (l & 7) == 0 {
return C_PSAUTO_8
}
return C_PSAUTO return C_PSAUTO
} }
if l <= 504 && (l&7) == 0 { if l <= 504 {
if (l & 7) == 0 {
return C_PPAUTO_8
}
return C_PPAUTO return C_PPAUTO
} }
if l <= 4095 { if l <= 4095 {
@ -1396,32 +1451,42 @@ func cmp(a int, b int) bool {
return true return true
} }
case C_PSAUTO:
if b == C_PSAUTO_8 {
return true
}
case C_PPAUTO: case C_PPAUTO:
if b == C_PSAUTO { if b == C_PSAUTO || b == C_PSAUTO_8 {
return true
}
case C_PPAUTO_8:
if b == C_PSAUTO_8 {
return true return true
} }
case C_UAUTO4K: case C_UAUTO4K:
switch b { switch b {
case C_PSAUTO, C_PPAUTO, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8: case C_PSAUTO, C_PSAUTO_8, C_PPAUTO, C_PPAUTO_8, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8:
return true return true
} }
case C_UAUTO8K: case C_UAUTO8K:
switch b { switch b {
case C_PSAUTO, C_PPAUTO, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO8K_4, C_UAUTO8K_8: case C_PSAUTO, C_PSAUTO_8, C_PPAUTO, C_PPAUTO_8, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO8K_4, C_UAUTO8K_8:
return true return true
} }
case C_UAUTO16K: case C_UAUTO16K:
switch b { switch b {
case C_PSAUTO, C_PPAUTO, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO16K_8: case C_PSAUTO, C_PSAUTO_8, C_PPAUTO, C_PPAUTO_8, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO16K_8:
return true return true
} }
case C_UAUTO32K: case C_UAUTO32K:
switch b { switch b {
case C_PSAUTO, C_PPAUTO, C_UAUTO4K_8, C_UAUTO8K_8, C_UAUTO16K_8: case C_PSAUTO, C_PSAUTO_8, C_PPAUTO, C_PPAUTO_8, C_UAUTO4K_8, C_UAUTO8K_8, C_UAUTO16K_8:
return true return true
} }
@ -1430,7 +1495,7 @@ func cmp(a int, b int) bool {
case C_LAUTO: case C_LAUTO:
switch b { switch b {
case C_PSAUTO, C_PPAUTO, case C_PSAUTO, C_PSAUTO_8, C_PPAUTO, C_PPAUTO_8,
C_UAUTO4K, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8,
C_UAUTO8K, C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K, C_UAUTO8K_4, C_UAUTO8K_8,
C_UAUTO16K, C_UAUTO16K_8, C_UAUTO16K, C_UAUTO16K_8,
@ -1440,36 +1505,42 @@ func cmp(a int, b int) bool {
return cmp(C_NPAUTO, b) return cmp(C_NPAUTO, b)
case C_PSOREG: case C_PSOREG:
if b == C_ZOREG { if b == C_ZOREG || b == C_PSOREG_8 {
return true return true
} }
case C_PPOREG: case C_PPOREG:
if b == C_ZOREG || b == C_PSOREG { switch b {
case C_ZOREG, C_PSOREG, C_PSOREG_8, C_PPOREG_8:
return true
}
case C_PPOREG_8:
if b == C_ZOREG || b == C_PSOREG_8 {
return true return true
} }
case C_UOREG4K: case C_UOREG4K:
switch b { switch b {
case C_ZOREG, C_PSOREG, C_PPOREG, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8: case C_ZOREG, C_PSOREG_8, C_PSOREG, C_PPOREG_8, C_PPOREG, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8:
return true return true
} }
case C_UOREG8K: case C_UOREG8K:
switch b { switch b {
case C_ZOREG, C_PSOREG, C_PPOREG, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG8K_4, C_UOREG8K_8: case C_ZOREG, C_PSOREG_8, C_PSOREG, C_PPOREG_8, C_PPOREG, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG8K_4, C_UOREG8K_8:
return true return true
} }
case C_UOREG16K: case C_UOREG16K:
switch b { switch b {
case C_ZOREG, C_PSOREG, C_PPOREG, C_UOREG4K_4, C_UOREG4K_8, C_UOREG8K_4, C_UOREG8K_8, C_UOREG16K_8: case C_ZOREG, C_PSOREG_8, C_PSOREG, C_PPOREG_8, C_PPOREG, C_UOREG4K_4, C_UOREG4K_8, C_UOREG8K_4, C_UOREG8K_8, C_UOREG16K_8:
return true return true
} }
case C_UOREG32K: case C_UOREG32K:
switch b { switch b {
case C_ZOREG, C_PSOREG, C_PPOREG, C_UOREG4K_8, C_UOREG8K_8, C_UOREG16K_8: case C_ZOREG, C_PSOREG_8, C_PSOREG, C_PPOREG_8, C_PPOREG, C_UOREG4K_8, C_UOREG8K_8, C_UOREG16K_8:
return true return true
} }
@ -1478,7 +1549,7 @@ func cmp(a int, b int) bool {
case C_LOREG: case C_LOREG:
switch b { switch b {
case C_ZOREG, C_PSOREG, C_PPOREG, case C_ZOREG, C_PSOREG_8, C_PSOREG, C_PPOREG_8, C_PPOREG,
C_UOREG4K, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8,
C_UOREG8K, C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K, C_UOREG8K_4, C_UOREG8K_8,
C_UOREG16K, C_UOREG16K_8, C_UOREG16K, C_UOREG16K_8,
@ -2605,7 +2676,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
c.ctxt.Diag("illegal bit position\n%v", p) c.ctxt.Diag("illegal bit position\n%v", p)
} }
if ((d >> uint(s*16)) >> 16) != 0 { if ((d >> uint(s*16)) >> 16) != 0 {
c.ctxt.Diag("requires uimm16\n%v",p) c.ctxt.Diag("requires uimm16\n%v", p)
} }
rt := int(p.To.Reg) rt := int(p.To.Reg)
@ -2998,31 +3069,50 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
o3 = c.olsr12u(p, int32(c.opldr12(p, p.As)), 0, REGTMP, int(p.To.Reg)) o3 = c.olsr12u(p, int32(c.opldr12(p, p.As)), 0, REGTMP, int(p.To.Reg))
case 66: /* ldp O(R)!, (r1, r2); ldp (R)O!, (r1, r2) */ case 66: /* ldp O(R)!, (r1, r2); ldp (R)O!, (r1, r2) */
v := int32(p.From.Offset) v := int32(c.regoff(&p.From))
r := int(p.From.Reg)
if r == obj.REG_NONE {
r = int(o.param)
}
if r == obj.REG_NONE {
c.ctxt.Diag("invalid ldp source: %v\n", p)
}
if v < -512 || v > 504 { if v < -512 || v > 504 || v%8 != 0 {
c.ctxt.Diag("offset out of range\n%v", p) c.ctxt.Diag("invalid offset %v\n", p)
} }
if o.scond == C_XPOST { if o.scond == C_XPOST {
o1 |= 1 << 23 o1 |= 1 << 23
} else { } else if o.scond == C_XPRE {
o1 |= 3 << 23 o1 |= 3 << 23
} else {
o1 |= 2 << 23
} }
o1 |= 1 << 22 o1 |= 1 << 22
o1 |= uint32(int64(2<<30|5<<27|((uint32(v)/8)&0x7f)<<15) | p.To.Offset<<10 | int64(uint32(p.From.Reg&31)<<5) | int64(p.To.Reg&31)) o1 |= uint32(int64(2<<30|5<<27|((uint32(v)/8)&0x7f)<<15) | (p.To.Offset&31)<<10 | int64(uint32(r&31)<<5) | int64(p.To.Reg&31))
case 67: /* stp (r1, r2), O(R)!; stp (r1, r2), (R)O! */ case 67: /* stp (r1, r2), O(R)!; stp (r1, r2), (R)O! */
v := int32(p.To.Offset) r := int(p.To.Reg)
if r == obj.REG_NONE {
if v < -512 || v > 504 { r = int(o.param)
c.ctxt.Diag("offset out of range\n%v", p)
} }
if r == obj.REG_NONE {
c.ctxt.Diag("invalid stp destination: %v\n", p)
}
v := int32(c.regoff(&p.To))
if v < -512 || v > 504 || v%8 != 0 {
c.ctxt.Diag("invalid offset %v\n", p)
}
if o.scond == C_XPOST { if o.scond == C_XPOST {
o1 |= 1 << 23 o1 |= 1 << 23
} else { } else if o.scond == C_XPRE {
o1 |= 3 << 23 o1 |= 3 << 23
} else {
o1 |= 2 << 23
} }
o1 |= uint32(int64(2<<30|5<<27|((uint32(v)/8)&0x7f)<<15) | p.From.Offset<<10 | int64(uint32(p.To.Reg&31)<<5) | int64(p.From.Reg&31)) o1 |= uint32(int64(2<<30|5<<27|((uint32(v)/8)&0x7f)<<15) | (p.From.Offset&31)<<10 | int64(uint32(r&31)<<5) | int64(p.From.Reg&31))
case 68: /* movT $vconaddr(SB), reg -> adrp + add + reloc */ case 68: /* movT $vconaddr(SB), reg -> adrp + add + reloc */
if p.As == AMOVW { if p.As == AMOVW {
@ -3072,6 +3162,114 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
rel.Add = 0 rel.Add = 0
rel.Type = objabi.R_ARM64_GOTPCREL rel.Type = objabi.R_ARM64_GOTPCREL
case 74:
// add $O, R, Rtmp
// ldp (Rtmp), (R1, R2)
r := int(p.From.Reg)
if r == obj.REG_NONE {
r = int(o.param)
}
if r == obj.REG_NONE {
c.ctxt.Diag("invalid ldp source: %v\n", p)
}
v := int32(c.regoff(&p.From))
if v < 0 || v > 4095 {
c.ctxt.Diag("offset out of range%v\n", p)
}
if o.scond == C_XPOST {
o2 |= 1 << 23
} else if o.scond == C_XPRE {
o2 |= 3 << 23
} else {
o2 |= 2 << 23
}
o1 = c.oaddi(p, int32(c.opirr(p, AADD)), v, r, REGTMP)
o2 |= 1 << 22
o2 |= uint32(int64(2<<30|5<<27) | (p.To.Offset&31)<<10 | int64(uint32(REGTMP&31)<<5) | int64(p.To.Reg&31))
case 75:
// mov $L, Rtmp (from constant pool)
// add Rtmp, R, Rtmp
// ldp (Rtmp), (R1, R2)
r := int(p.From.Reg)
if r == obj.REG_NONE {
r = int(o.param)
}
if r == obj.REG_NONE {
c.ctxt.Diag("invalid ldp source: %v\n", p)
}
if o.scond == C_XPOST {
o3 |= 1 << 23
} else if o.scond == C_XPRE {
o3 |= 3 << 23
} else {
o3 |= 2 << 23
}
o1 = c.omovlit(AMOVD, p, &p.From, REGTMP)
o2 = c.opxrrr(p, AADD)
o2 |= (REGTMP & 31) << 16
o2 |= uint32(r&31) << 5
o2 |= uint32(REGTMP & 31)
o3 |= 1 << 22
o3 |= uint32(int64(2<<30|5<<27) | (p.To.Offset&31)<<10 | int64(uint32(REGTMP&31)<<5) | int64(p.To.Reg&31))
case 76:
// add $O, R, Rtmp
// stp (R1, R2), (Rtmp)
r := int(p.To.Reg)
if r == obj.REG_NONE {
r = int(o.param)
}
if r == obj.REG_NONE {
c.ctxt.Diag("invalid stp destination: %v\n", p)
}
v := int32(c.regoff(&p.To))
if v < 0 || v > 4095 {
c.ctxt.Diag("offset out of range%v\n", p)
}
if o.scond == C_XPOST {
o2 |= 1 << 23
} else if o.scond == C_XPRE {
o2 |= 3 << 23
} else {
o2 |= 2 << 23
}
o1 = c.oaddi(p, int32(c.opirr(p, AADD)), v, r, REGTMP)
o2 |= uint32(int64(2<<30|5<<27) | (p.From.Offset&31)<<10 | int64(uint32(REGTMP&31)<<5) | int64(p.From.Reg&31))
case 77:
// mov $L, Rtmp (from constant pool)
// add Rtmp, R, Rtmp
// stp (R1, R2), (Rtmp)
r := int(p.To.Reg)
if r == obj.REG_NONE {
r = int(o.param)
}
if r == obj.REG_NONE {
c.ctxt.Diag("invalid stp destination: %v\n", p)
}
if o.scond == C_XPOST {
o3 |= 1 << 23
} else if o.scond == C_XPRE {
o3 |= 3 << 23
} else {
o3 |= 2 << 23
}
o1 = c.omovlit(AMOVD, p, &p.To, REGTMP)
o2 = c.opxrrr(p, AADD)
o2 |= REGTMP & 31 << 16
o2 |= uint32(r&31) << 5
o2 |= uint32(REGTMP & 31)
o3 |= uint32(int64(2<<30|5<<27) | (p.From.Offset&31)<<10 | int64(uint32(REGTMP&31)<<5) | int64(p.From.Reg&31))
// This is supposed to be something that stops execution. // This is supposed to be something that stops execution.
// It's not supposed to be reached, ever, but if it is, we'd // It's not supposed to be reached, ever, but if it is, we'd
// like to be able to tell how we got there. Assemble as // like to be able to tell how we got there. Assemble as

View file

@ -5,134 +5,70 @@
#include "textflag.h" #include "textflag.h"
TEXT runtime·duffzero(SB), NOSPLIT, $-8-0 TEXT runtime·duffzero(SB), NOSPLIT, $-8-0
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP.P (ZR, ZR), 16(R16)
MOVD.W ZR, 8(R16) STP (ZR, ZR), (R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
MOVD.W ZR, 8(R16)
RET RET
TEXT runtime·duffcopy(SB), NOSPLIT, $0-0 TEXT runtime·duffcopy(SB), NOSPLIT, $0-0

View file

@ -151,12 +151,13 @@ func copyARM(w io.Writer) {
func zeroARM64(w io.Writer) { func zeroARM64(w io.Writer) {
// ZR: always zero // ZR: always zero
// R16 (aka REGRT1): ptr to memory to be zeroed - 8 // R16 (aka REGRT1): ptr to memory to be zeroed
// On return, R16 points to the last zeroed dword. // On return, R16 points to the last zeroed dword.
fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $-8-0") fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $-8-0")
for i := 0; i < 128; i++ { for i := 0; i < 63; i++ {
fmt.Fprintln(w, "\tMOVD.W\tZR, 8(R16)") fmt.Fprintln(w, "\tSTP.P\t(ZR, ZR), 16(R16)")
} }
fmt.Fprintln(w, "\tSTP\t(ZR, ZR), (R16)")
fmt.Fprintln(w, "\tRET") fmt.Fprintln(w, "\tRET")
} }