2018-03-29 00:55:53 +02:00
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
package wasm
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"bytes"
|
|
|
|
|
"cmd/internal/obj"
|
|
|
|
|
"cmd/internal/objabi"
|
|
|
|
|
"cmd/internal/sys"
|
|
|
|
|
"encoding/binary"
|
|
|
|
|
"fmt"
|
2023-04-19 13:21:02 -04:00
|
|
|
"internal/abi"
|
2018-03-29 00:55:53 +02:00
|
|
|
"io"
|
|
|
|
|
"math"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var Register = map[string]int16{
|
2018-10-11 12:46:14 +02:00
|
|
|
"SP": REG_SP,
|
|
|
|
|
"CTXT": REG_CTXT,
|
|
|
|
|
"g": REG_g,
|
|
|
|
|
"RET0": REG_RET0,
|
|
|
|
|
"RET1": REG_RET1,
|
|
|
|
|
"RET2": REG_RET2,
|
|
|
|
|
"RET3": REG_RET3,
|
|
|
|
|
"PAUSE": REG_PAUSE,
|
2018-03-29 00:55:53 +02:00
|
|
|
|
|
|
|
|
"R0": REG_R0,
|
|
|
|
|
"R1": REG_R1,
|
|
|
|
|
"R2": REG_R2,
|
|
|
|
|
"R3": REG_R3,
|
|
|
|
|
"R4": REG_R4,
|
|
|
|
|
"R5": REG_R5,
|
|
|
|
|
"R6": REG_R6,
|
|
|
|
|
"R7": REG_R7,
|
|
|
|
|
"R8": REG_R8,
|
|
|
|
|
"R9": REG_R9,
|
|
|
|
|
"R10": REG_R10,
|
|
|
|
|
"R11": REG_R11,
|
|
|
|
|
"R12": REG_R12,
|
|
|
|
|
"R13": REG_R13,
|
|
|
|
|
"R14": REG_R14,
|
|
|
|
|
"R15": REG_R15,
|
|
|
|
|
|
|
|
|
|
"F0": REG_F0,
|
|
|
|
|
"F1": REG_F1,
|
|
|
|
|
"F2": REG_F2,
|
|
|
|
|
"F3": REG_F3,
|
|
|
|
|
"F4": REG_F4,
|
|
|
|
|
"F5": REG_F5,
|
|
|
|
|
"F6": REG_F6,
|
|
|
|
|
"F7": REG_F7,
|
|
|
|
|
"F8": REG_F8,
|
|
|
|
|
"F9": REG_F9,
|
|
|
|
|
"F10": REG_F10,
|
|
|
|
|
"F11": REG_F11,
|
|
|
|
|
"F12": REG_F12,
|
|
|
|
|
"F13": REG_F13,
|
|
|
|
|
"F14": REG_F14,
|
|
|
|
|
"F15": REG_F15,
|
cmd, runtime: remove PC_F & PC_B globals on Wasm
Following the previous CL, this removes more global variables on
Wasm.
PC_B is used mostly for intra-function jumps, and for a function
telling its callee where to start or resume. This usage can be
served by a parameter. The top level loop (wasm_pc_f_loop) uses
PC_B for resuming a function. This value is either set by gogo,
or loaded from the Go stack at function return. Instead of
loading PC_B at each function return, we could make gogo stores
PC_B at the same stack location, and let the top level loop do
the load. This way, we don't need to use global PC_B to
communicate with the top level loop, and we can replace global
PC_B with a parameter.
PC_F is similar. It is even more so in that the only reader is
the top level loop. Let the top level loop read it from the stack,
and we can get rid of PC_F entirely.
PC_F and PC_B are used less entensively as SP, so this CL has
smaller performance gain.
Running on Chrome 74.0.3729.108 on Linux/AMD64,
name old time/op new time/op delta
BinaryTree17 16.6s ± 0% 16.2s ± 1% -2.59% (p=0.016 n=4+5)
Fannkuch11 11.1s ± 1% 10.8s ± 0% -2.65% (p=0.008 n=5+5)
FmtFprintfEmpty 231ns ± 1% 217ns ± 0% -6.06% (p=0.008 n=5+5)
FmtFprintfString 407ns ± 3% 375ns ± 2% -7.81% (p=0.008 n=5+5)
FmtFprintfInt 466ns ± 2% 430ns ± 0% -7.79% (p=0.016 n=5+4)
FmtFprintfIntInt 719ns ± 2% 673ns ± 2% -6.37% (p=0.008 n=5+5)
FmtFprintfPrefixedInt 706ns ± 1% 676ns ± 3% -4.31% (p=0.008 n=5+5)
FmtFprintfFloat 1.01µs ± 1% 0.97µs ± 1% -4.30% (p=0.008 n=5+5)
FmtManyArgs 2.67µs ± 1% 2.51µs ± 1% -5.95% (p=0.008 n=5+5)
GobDecode 30.7ms ± 9% 31.3ms ±34% ~ (p=0.222 n=5+5)
GobEncode 24.2ms ±23% 20.2ms ± 0% -16.36% (p=0.016 n=5+4)
Gzip 852ms ± 0% 823ms ± 0% -3.38% (p=0.016 n=4+5)
Gunzip 160ms ± 1% 151ms ± 1% -5.37% (p=0.008 n=5+5)
JSONEncode 35.7ms ± 1% 34.3ms ± 1% -3.81% (p=0.008 n=5+5)
JSONDecode 247ms ± 8% 254ms ± 7% ~ (p=0.548 n=5+5)
Mandelbrot200 5.39ms ± 0% 5.41ms ± 0% +0.42% (p=0.008 n=5+5)
GoParse 18.5ms ± 1% 18.3ms ± 2% ~ (p=0.343 n=4+4)
RegexpMatchEasy0_32 424ns ± 2% 397ns ± 0% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 2.88µs ± 0% 2.86µs ± 1% ~ (p=0.079 n=5+5)
RegexpMatchEasy1_32 395ns ± 2% 370ns ± 1% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 3.26µs ± 0% 3.19µs ± 1% -2.06% (p=0.008 n=5+5)
RegexpMatchMedium_32 564ns ± 1% 532ns ± 0% -5.71% (p=0.008 n=5+5)
RegexpMatchMedium_1K 146µs ± 2% 140µs ± 1% -4.62% (p=0.008 n=5+5)
RegexpMatchHard_32 8.47µs ± 1% 7.91µs ± 1% -6.65% (p=0.008 n=5+5)
RegexpMatchHard_1K 253µs ± 1% 236µs ± 2% -6.66% (p=0.008 n=5+5)
Revcomp 1.78s ± 4% 1.76s ± 5% ~ (p=1.000 n=5+5)
Template 292ms ±29% 269ms ± 5% ~ (p=0.690 n=5+5)
TimeParse 1.61µs ± 4% 1.54µs ± 1% -4.42% (p=0.008 n=5+5)
TimeFormat 1.66µs ± 3% 1.58µs ± 1% -5.22% (p=0.008 n=5+5)
[Geo mean] 232µs 221µs -4.54%
name old speed new speed delta
GobDecode 25.0MB/s ± 8% 25.1MB/s ±27% ~ (p=0.222 n=5+5)
GobEncode 32.8MB/s ±21% 38.0MB/s ± 0% +15.84% (p=0.016 n=5+4)
Gzip 22.8MB/s ± 0% 23.6MB/s ± 0% +3.49% (p=0.016 n=4+5)
Gunzip 121MB/s ± 1% 128MB/s ± 1% +5.68% (p=0.008 n=5+5)
JSONEncode 54.4MB/s ± 1% 56.5MB/s ± 1% +3.97% (p=0.008 n=5+5)
JSONDecode 7.88MB/s ± 8% 7.65MB/s ± 8% ~ (p=0.548 n=5+5)
GoParse 3.07MB/s ± 8% 3.00MB/s ±22% ~ (p=0.579 n=5+5)
RegexpMatchEasy0_32 75.6MB/s ± 2% 80.5MB/s ± 0% +6.58% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 356MB/s ± 0% 358MB/s ± 1% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32 81.1MB/s ± 2% 86.5MB/s ± 1% +6.69% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 314MB/s ± 0% 320MB/s ± 0% +2.10% (p=0.008 n=5+5)
RegexpMatchMedium_32 1.77MB/s ± 1% 1.88MB/s ± 0% +6.09% (p=0.016 n=5+4)
RegexpMatchMedium_1K 6.99MB/s ± 2% 7.33MB/s ± 1% +4.83% (p=0.008 n=5+5)
RegexpMatchHard_32 3.78MB/s ± 1% 4.04MB/s ± 1% +7.04% (p=0.008 n=5+5)
RegexpMatchHard_1K 4.04MB/s ± 1% 4.33MB/s ± 2% +7.17% (p=0.008 n=5+5)
Revcomp 143MB/s ± 4% 145MB/s ± 5% ~ (p=1.000 n=5+5)
Template 6.77MB/s ±24% 7.22MB/s ± 5% ~ (p=0.690 n=5+5)
[Geo mean] 24.4MB/s 25.4MB/s +4.18%
Change-Id: Ib80716e62992aec28b2c4a96af280c278f83aa49
Reviewed-on: https://go-review.googlesource.com/c/go/+/173980
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2019-04-25 17:25:54 -04:00
|
|
|
|
2019-09-12 21:05:45 +02:00
|
|
|
"F16": REG_F16,
|
|
|
|
|
"F17": REG_F17,
|
|
|
|
|
"F18": REG_F18,
|
|
|
|
|
"F19": REG_F19,
|
|
|
|
|
"F20": REG_F20,
|
|
|
|
|
"F21": REG_F21,
|
|
|
|
|
"F22": REG_F22,
|
|
|
|
|
"F23": REG_F23,
|
|
|
|
|
"F24": REG_F24,
|
|
|
|
|
"F25": REG_F25,
|
|
|
|
|
"F26": REG_F26,
|
|
|
|
|
"F27": REG_F27,
|
|
|
|
|
"F28": REG_F28,
|
|
|
|
|
"F29": REG_F29,
|
|
|
|
|
"F30": REG_F30,
|
|
|
|
|
"F31": REG_F31,
|
|
|
|
|
|
cmd, runtime: remove PC_F & PC_B globals on Wasm
Following the previous CL, this removes more global variables on
Wasm.
PC_B is used mostly for intra-function jumps, and for a function
telling its callee where to start or resume. This usage can be
served by a parameter. The top level loop (wasm_pc_f_loop) uses
PC_B for resuming a function. This value is either set by gogo,
or loaded from the Go stack at function return. Instead of
loading PC_B at each function return, we could make gogo stores
PC_B at the same stack location, and let the top level loop do
the load. This way, we don't need to use global PC_B to
communicate with the top level loop, and we can replace global
PC_B with a parameter.
PC_F is similar. It is even more so in that the only reader is
the top level loop. Let the top level loop read it from the stack,
and we can get rid of PC_F entirely.
PC_F and PC_B are used less entensively as SP, so this CL has
smaller performance gain.
Running on Chrome 74.0.3729.108 on Linux/AMD64,
name old time/op new time/op delta
BinaryTree17 16.6s ± 0% 16.2s ± 1% -2.59% (p=0.016 n=4+5)
Fannkuch11 11.1s ± 1% 10.8s ± 0% -2.65% (p=0.008 n=5+5)
FmtFprintfEmpty 231ns ± 1% 217ns ± 0% -6.06% (p=0.008 n=5+5)
FmtFprintfString 407ns ± 3% 375ns ± 2% -7.81% (p=0.008 n=5+5)
FmtFprintfInt 466ns ± 2% 430ns ± 0% -7.79% (p=0.016 n=5+4)
FmtFprintfIntInt 719ns ± 2% 673ns ± 2% -6.37% (p=0.008 n=5+5)
FmtFprintfPrefixedInt 706ns ± 1% 676ns ± 3% -4.31% (p=0.008 n=5+5)
FmtFprintfFloat 1.01µs ± 1% 0.97µs ± 1% -4.30% (p=0.008 n=5+5)
FmtManyArgs 2.67µs ± 1% 2.51µs ± 1% -5.95% (p=0.008 n=5+5)
GobDecode 30.7ms ± 9% 31.3ms ±34% ~ (p=0.222 n=5+5)
GobEncode 24.2ms ±23% 20.2ms ± 0% -16.36% (p=0.016 n=5+4)
Gzip 852ms ± 0% 823ms ± 0% -3.38% (p=0.016 n=4+5)
Gunzip 160ms ± 1% 151ms ± 1% -5.37% (p=0.008 n=5+5)
JSONEncode 35.7ms ± 1% 34.3ms ± 1% -3.81% (p=0.008 n=5+5)
JSONDecode 247ms ± 8% 254ms ± 7% ~ (p=0.548 n=5+5)
Mandelbrot200 5.39ms ± 0% 5.41ms ± 0% +0.42% (p=0.008 n=5+5)
GoParse 18.5ms ± 1% 18.3ms ± 2% ~ (p=0.343 n=4+4)
RegexpMatchEasy0_32 424ns ± 2% 397ns ± 0% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 2.88µs ± 0% 2.86µs ± 1% ~ (p=0.079 n=5+5)
RegexpMatchEasy1_32 395ns ± 2% 370ns ± 1% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 3.26µs ± 0% 3.19µs ± 1% -2.06% (p=0.008 n=5+5)
RegexpMatchMedium_32 564ns ± 1% 532ns ± 0% -5.71% (p=0.008 n=5+5)
RegexpMatchMedium_1K 146µs ± 2% 140µs ± 1% -4.62% (p=0.008 n=5+5)
RegexpMatchHard_32 8.47µs ± 1% 7.91µs ± 1% -6.65% (p=0.008 n=5+5)
RegexpMatchHard_1K 253µs ± 1% 236µs ± 2% -6.66% (p=0.008 n=5+5)
Revcomp 1.78s ± 4% 1.76s ± 5% ~ (p=1.000 n=5+5)
Template 292ms ±29% 269ms ± 5% ~ (p=0.690 n=5+5)
TimeParse 1.61µs ± 4% 1.54µs ± 1% -4.42% (p=0.008 n=5+5)
TimeFormat 1.66µs ± 3% 1.58µs ± 1% -5.22% (p=0.008 n=5+5)
[Geo mean] 232µs 221µs -4.54%
name old speed new speed delta
GobDecode 25.0MB/s ± 8% 25.1MB/s ±27% ~ (p=0.222 n=5+5)
GobEncode 32.8MB/s ±21% 38.0MB/s ± 0% +15.84% (p=0.016 n=5+4)
Gzip 22.8MB/s ± 0% 23.6MB/s ± 0% +3.49% (p=0.016 n=4+5)
Gunzip 121MB/s ± 1% 128MB/s ± 1% +5.68% (p=0.008 n=5+5)
JSONEncode 54.4MB/s ± 1% 56.5MB/s ± 1% +3.97% (p=0.008 n=5+5)
JSONDecode 7.88MB/s ± 8% 7.65MB/s ± 8% ~ (p=0.548 n=5+5)
GoParse 3.07MB/s ± 8% 3.00MB/s ±22% ~ (p=0.579 n=5+5)
RegexpMatchEasy0_32 75.6MB/s ± 2% 80.5MB/s ± 0% +6.58% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 356MB/s ± 0% 358MB/s ± 1% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32 81.1MB/s ± 2% 86.5MB/s ± 1% +6.69% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 314MB/s ± 0% 320MB/s ± 0% +2.10% (p=0.008 n=5+5)
RegexpMatchMedium_32 1.77MB/s ± 1% 1.88MB/s ± 0% +6.09% (p=0.016 n=5+4)
RegexpMatchMedium_1K 6.99MB/s ± 2% 7.33MB/s ± 1% +4.83% (p=0.008 n=5+5)
RegexpMatchHard_32 3.78MB/s ± 1% 4.04MB/s ± 1% +7.04% (p=0.008 n=5+5)
RegexpMatchHard_1K 4.04MB/s ± 1% 4.33MB/s ± 2% +7.17% (p=0.008 n=5+5)
Revcomp 143MB/s ± 4% 145MB/s ± 5% ~ (p=1.000 n=5+5)
Template 6.77MB/s ±24% 7.22MB/s ± 5% ~ (p=0.690 n=5+5)
[Geo mean] 24.4MB/s 25.4MB/s +4.18%
Change-Id: Ib80716e62992aec28b2c4a96af280c278f83aa49
Reviewed-on: https://go-review.googlesource.com/c/go/+/173980
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2019-04-25 17:25:54 -04:00
|
|
|
"PC_B": REG_PC_B,
|
2018-03-29 00:55:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var registerNames []string
|
|
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
|
obj.RegisterRegister(MINREG, MAXREG, rconv)
|
|
|
|
|
obj.RegisterOpcode(obj.ABaseWasm, Anames)
|
|
|
|
|
|
|
|
|
|
registerNames = make([]string, MAXREG-MINREG)
|
|
|
|
|
for name, reg := range Register {
|
|
|
|
|
registerNames[reg-MINREG] = name
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func rconv(r int) string {
|
|
|
|
|
return registerNames[r-MINREG]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var unaryDst = map[obj.As]bool{
|
|
|
|
|
ASet: true,
|
|
|
|
|
ATee: true,
|
|
|
|
|
ACall: true,
|
|
|
|
|
ACallIndirect: true,
|
|
|
|
|
ABr: true,
|
|
|
|
|
ABrIf: true,
|
|
|
|
|
ABrTable: true,
|
|
|
|
|
AI32Store: true,
|
|
|
|
|
AI64Store: true,
|
|
|
|
|
AF32Store: true,
|
|
|
|
|
AF64Store: true,
|
|
|
|
|
AI32Store8: true,
|
|
|
|
|
AI32Store16: true,
|
|
|
|
|
AI64Store8: true,
|
|
|
|
|
AI64Store16: true,
|
|
|
|
|
AI64Store32: true,
|
|
|
|
|
ACALLNORESUME: true,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var Linkwasm = obj.LinkArch{
|
|
|
|
|
Arch: sys.ArchWasm,
|
|
|
|
|
Init: instinit,
|
|
|
|
|
Preprocess: preprocess,
|
|
|
|
|
Assemble: assemble,
|
|
|
|
|
UnaryDst: unaryDst,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var (
|
|
|
|
|
morestack *obj.LSym
|
|
|
|
|
morestackNoCtxt *obj.LSym
|
|
|
|
|
sigpanic *obj.LSym
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
/* mark flags */
|
|
|
|
|
WasmImport = 1 << 0
|
|
|
|
|
)
|
|
|
|
|
|
2023-01-22 15:30:59 -08:00
|
|
|
const (
|
|
|
|
|
// This is a special wasm module name that when used as the module name
|
|
|
|
|
// in //go:wasmimport will cause the generated code to pass the stack pointer
|
|
|
|
|
// directly to the imported function. In other words, any function that
|
|
|
|
|
// uses the gojs module understands the internal Go WASM ABI directly.
|
|
|
|
|
GojsModule = "gojs"
|
|
|
|
|
)
|
|
|
|
|
|
2018-03-29 00:55:53 +02:00
|
|
|
func instinit(ctxt *obj.Link) {
|
|
|
|
|
morestack = ctxt.Lookup("runtime.morestack")
|
|
|
|
|
morestackNoCtxt = ctxt.Lookup("runtime.morestack_noctxt")
|
cmd/compile: separate data and function LSyms
Currently, obj.Ctxt's symbol table does not distinguish between ABI0
and ABIInternal symbols. This is *almost* okay, since a given symbol
name in the final object file is only going to belong to one ABI or
the other, but it requires that the compiler mark a Sym as being a
function symbol before it retrieves its LSym. If it retrieves the LSym
first, that LSym will be created as ABI0, and later marking the Sym as
a function symbol won't change the LSym's ABI.
Marking a Sym as a function symbol before looking up its LSym sounds
easy, except Syms have a dual purpose: they are used just as interned
strings (every function, variable, parameter, etc with the same
textual name shares a Sym), and *also* to store state for whatever
package global has that name. As a result, it's easy to slip up and
look up an LSym when a Sym is serving as the name of a local variable,
and then later mark it as a function when it's serving as the global
with the name.
In general, we were careful to avoid this, but #29610 demonstrates one
case where we messed up. Because of on-demand importing from indexed
export data, it's possible to compile a method wrapper for a type
imported from another package before importing an init function from
that package. If the argument of the method is named "init", the
"init" LSym will be created as a data symbol when compiling the
wrapper, before it gets marked as a function symbol.
To fix this, we separate obj.Ctxt's symbol tables for ABI0 and
ABIInternal symbols. This way, the compiler will simply get a
different LSym once the Sym takes on its package-global meaning as a
function.
This fixes the above ordering issue, and means we no longer need to go
out of our way to create the "init" function early and mark it as a
function symbol.
Fixes #29610.
Updates #27539.
Change-Id: Id9458b40017893d46ef9e4a3f9b47fc49e1ce8df
Reviewed-on: https://go-review.googlesource.com/c/157017
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
2019-01-08 22:23:52 -05:00
|
|
|
sigpanic = ctxt.LookupABI("runtime.sigpanic", obj.ABIInternal)
|
2018-03-29 00:55:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
|
|
|
|
|
appendp := func(p *obj.Prog, as obj.As, args ...obj.Addr) *obj.Prog {
|
|
|
|
|
if p.As != obj.ANOP {
|
|
|
|
|
p2 := obj.Appendp(p, newprog)
|
|
|
|
|
p2.Pc = p.Pc
|
|
|
|
|
p = p2
|
|
|
|
|
}
|
|
|
|
|
p.As = as
|
|
|
|
|
switch len(args) {
|
|
|
|
|
case 0:
|
|
|
|
|
p.From = obj.Addr{}
|
|
|
|
|
p.To = obj.Addr{}
|
|
|
|
|
case 1:
|
|
|
|
|
if unaryDst[as] {
|
|
|
|
|
p.From = obj.Addr{}
|
|
|
|
|
p.To = args[0]
|
|
|
|
|
} else {
|
|
|
|
|
p.From = args[0]
|
|
|
|
|
p.To = obj.Addr{}
|
|
|
|
|
}
|
|
|
|
|
case 2:
|
|
|
|
|
p.From = args[0]
|
|
|
|
|
p.To = args[1]
|
|
|
|
|
default:
|
|
|
|
|
panic("bad args")
|
|
|
|
|
}
|
|
|
|
|
return p
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-19 00:30:12 -04:00
|
|
|
framesize := s.Func().Text.To.Offset
|
2018-03-29 00:55:53 +02:00
|
|
|
if framesize < 0 {
|
|
|
|
|
panic("bad framesize")
|
|
|
|
|
}
|
2020-07-19 00:30:12 -04:00
|
|
|
s.Func().Args = s.Func().Text.To.Val.(int32)
|
|
|
|
|
s.Func().Locals = int32(framesize)
|
2018-03-29 00:55:53 +02:00
|
|
|
|
2023-01-22 15:30:59 -08:00
|
|
|
// If the function exits just to call out to a wasmimport, then
|
|
|
|
|
// generate the code to translate from our internal Go-stack
|
|
|
|
|
// based call convention to the native webassembly call convention.
|
|
|
|
|
if wi := s.Func().WasmImport; wi != nil {
|
|
|
|
|
s.Func().WasmImportSym = wi.CreateSym(ctxt)
|
|
|
|
|
p := s.Func().Text
|
|
|
|
|
if p.Link != nil {
|
|
|
|
|
panic("wrapper functions for WASM imports should not have a body")
|
|
|
|
|
}
|
|
|
|
|
to := obj.Addr{
|
|
|
|
|
Type: obj.TYPE_MEM,
|
|
|
|
|
Name: obj.NAME_EXTERN,
|
|
|
|
|
Sym: s,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If the module that the import is for is our magic "gojs" module, then this
|
|
|
|
|
// indicates that the called function understands the Go stack-based call convention
|
|
|
|
|
// so we just pass the stack pointer to it, knowing it will read the params directly
|
|
|
|
|
// off the stack and push the results into memory based on the stack pointer.
|
|
|
|
|
if wi.Module == GojsModule {
|
|
|
|
|
// The called function has a signature of 'func(sp int)'. It has access to the memory
|
|
|
|
|
// value somewhere to be able to address the memory based on the "sp" value.
|
|
|
|
|
|
|
|
|
|
p = appendp(p, AGet, regAddr(REG_SP))
|
|
|
|
|
p = appendp(p, ACall, to)
|
|
|
|
|
|
|
|
|
|
p.Mark = WasmImport
|
|
|
|
|
} else {
|
|
|
|
|
if len(wi.Results) > 1 {
|
|
|
|
|
// TODO(evanphx) implement support for the multi-value proposal:
|
|
|
|
|
// https://github.com/WebAssembly/multi-value/blob/master/proposals/multi-value/Overview.md
|
|
|
|
|
panic("invalid results type") // impossible until multi-value proposal has landed
|
|
|
|
|
}
|
|
|
|
|
if len(wi.Results) == 1 {
|
|
|
|
|
// If we have a result (rather than returning nothing at all), then
|
|
|
|
|
// we'll write the result to the Go stack relative to the current stack pointer.
|
|
|
|
|
// We cache the current stack pointer value on the wasm stack here and then use
|
|
|
|
|
// it after the Call instruction to store the result.
|
|
|
|
|
p = appendp(p, AGet, regAddr(REG_SP))
|
|
|
|
|
}
|
|
|
|
|
for _, f := range wi.Params {
|
|
|
|
|
// Each load instructions will consume the value of sp on the stack, so
|
|
|
|
|
// we need to read sp for each param. WASM appears to not have a stack dup instruction
|
|
|
|
|
// (a strange ommission for a stack-based VM), if it did, we'd be using the dup here.
|
|
|
|
|
p = appendp(p, AGet, regAddr(REG_SP))
|
|
|
|
|
|
|
|
|
|
// Offset is the location of the param on the Go stack (ie relative to sp).
|
|
|
|
|
// Because of our call convention, the parameters are located an additional 8 bytes
|
2023-03-31 00:00:19 +08:00
|
|
|
// from sp because we store the return address as an int64 at the bottom of the stack.
|
2023-01-22 15:30:59 -08:00
|
|
|
// Ie the stack looks like [return_addr, param3, param2, param1, etc]
|
|
|
|
|
|
|
|
|
|
// Ergo, we add 8 to the true byte offset of the param to skip the return address.
|
|
|
|
|
loadOffset := f.Offset + 8
|
|
|
|
|
|
|
|
|
|
// We're reading the value from the Go stack onto the WASM stack and leaving it there
|
|
|
|
|
// for CALL to pick them up.
|
|
|
|
|
switch f.Type {
|
|
|
|
|
case obj.WasmI32:
|
|
|
|
|
p = appendp(p, AI32Load, constAddr(loadOffset))
|
|
|
|
|
case obj.WasmI64:
|
|
|
|
|
p = appendp(p, AI64Load, constAddr(loadOffset))
|
|
|
|
|
case obj.WasmF32:
|
|
|
|
|
p = appendp(p, AF32Load, constAddr(loadOffset))
|
|
|
|
|
case obj.WasmF64:
|
|
|
|
|
p = appendp(p, AF64Load, constAddr(loadOffset))
|
|
|
|
|
case obj.WasmPtr:
|
|
|
|
|
p = appendp(p, AI64Load, constAddr(loadOffset))
|
|
|
|
|
p = appendp(p, AI32WrapI64)
|
|
|
|
|
default:
|
|
|
|
|
panic("bad param type")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The call instruction is marked as being for a wasm import so that a later phase
|
|
|
|
|
// will generate relocation information that allows us to patch this with then
|
|
|
|
|
// offset of the imported function in the wasm imports.
|
|
|
|
|
p = appendp(p, ACall, to)
|
|
|
|
|
p.Mark = WasmImport
|
|
|
|
|
|
|
|
|
|
if len(wi.Results) == 1 {
|
|
|
|
|
f := wi.Results[0]
|
|
|
|
|
|
|
|
|
|
// Much like with the params, we need to adjust the offset we store the result value
|
|
|
|
|
// to by 8 bytes to account for the return address on the Go stack.
|
|
|
|
|
storeOffset := f.Offset + 8
|
|
|
|
|
|
|
|
|
|
// This code is paired the code above that reads the stack pointer onto the wasm
|
|
|
|
|
// stack. We've done this so we have a consistent view of the sp value as it might
|
|
|
|
|
// be manipulated by the call and we want to ignore that manipulation here.
|
|
|
|
|
switch f.Type {
|
|
|
|
|
case obj.WasmI32:
|
|
|
|
|
p = appendp(p, AI32Store, constAddr(storeOffset))
|
|
|
|
|
case obj.WasmI64:
|
|
|
|
|
p = appendp(p, AI64Store, constAddr(storeOffset))
|
|
|
|
|
case obj.WasmF32:
|
|
|
|
|
p = appendp(p, AF32Store, constAddr(storeOffset))
|
|
|
|
|
case obj.WasmF64:
|
|
|
|
|
p = appendp(p, AF64Store, constAddr(storeOffset))
|
|
|
|
|
case obj.WasmPtr:
|
|
|
|
|
p = appendp(p, AI64ExtendI32U)
|
|
|
|
|
p = appendp(p, AI64Store, constAddr(storeOffset))
|
|
|
|
|
default:
|
|
|
|
|
panic("bad result type")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
p = appendp(p, obj.ARET)
|
|
|
|
|
|
|
|
|
|
// It should be 0 already, but we'll set it to 0 anyway just to be sure
|
|
|
|
|
// that the code below which adds frame expansion code to the function body
|
|
|
|
|
// isn't run. We don't want the frame expansion code because our function
|
|
|
|
|
// body is just the code to translate and call the imported function.
|
|
|
|
|
framesize = 0
|
|
|
|
|
} else if s.Func().Text.From.Sym.Wrapper() {
|
2018-03-29 00:55:53 +02:00
|
|
|
// if g._panic != nil && g._panic.argp == FP {
|
|
|
|
|
// g._panic.argp = bottom-of-frame
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// MOVD g_panic(g), R0
|
|
|
|
|
// Get R0
|
|
|
|
|
// I64Eqz
|
|
|
|
|
// Not
|
|
|
|
|
// If
|
|
|
|
|
// Get SP
|
2018-12-12 13:04:44 +01:00
|
|
|
// I64ExtendI32U
|
2018-03-29 00:55:53 +02:00
|
|
|
// I64Const $framesize+8
|
|
|
|
|
// I64Add
|
|
|
|
|
// I64Load panic_argp(R0)
|
|
|
|
|
// I64Eq
|
|
|
|
|
// If
|
|
|
|
|
// MOVD SP, panic_argp(R0)
|
|
|
|
|
// End
|
|
|
|
|
// End
|
|
|
|
|
|
|
|
|
|
gpanic := obj.Addr{
|
|
|
|
|
Type: obj.TYPE_MEM,
|
|
|
|
|
Reg: REGG,
|
|
|
|
|
Offset: 4 * 8, // g_panic
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
panicargp := obj.Addr{
|
|
|
|
|
Type: obj.TYPE_MEM,
|
|
|
|
|
Reg: REG_R0,
|
|
|
|
|
Offset: 0, // panic.argp
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-19 00:30:12 -04:00
|
|
|
p := s.Func().Text
|
2018-03-29 00:55:53 +02:00
|
|
|
p = appendp(p, AMOVD, gpanic, regAddr(REG_R0))
|
|
|
|
|
|
|
|
|
|
p = appendp(p, AGet, regAddr(REG_R0))
|
|
|
|
|
p = appendp(p, AI64Eqz)
|
|
|
|
|
p = appendp(p, ANot)
|
|
|
|
|
p = appendp(p, AIf)
|
|
|
|
|
|
|
|
|
|
p = appendp(p, AGet, regAddr(REG_SP))
|
2018-12-12 13:04:44 +01:00
|
|
|
p = appendp(p, AI64ExtendI32U)
|
2018-03-29 00:55:53 +02:00
|
|
|
p = appendp(p, AI64Const, constAddr(framesize+8))
|
|
|
|
|
p = appendp(p, AI64Add)
|
|
|
|
|
p = appendp(p, AI64Load, panicargp)
|
|
|
|
|
|
|
|
|
|
p = appendp(p, AI64Eq)
|
|
|
|
|
p = appendp(p, AIf)
|
|
|
|
|
p = appendp(p, AMOVD, regAddr(REG_SP), panicargp)
|
|
|
|
|
p = appendp(p, AEnd)
|
|
|
|
|
|
|
|
|
|
p = appendp(p, AEnd)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if framesize > 0 {
|
2020-07-19 00:30:12 -04:00
|
|
|
p := s.Func().Text
|
2018-03-29 00:55:53 +02:00
|
|
|
p = appendp(p, AGet, regAddr(REG_SP))
|
|
|
|
|
p = appendp(p, AI32Const, constAddr(framesize))
|
|
|
|
|
p = appendp(p, AI32Sub)
|
|
|
|
|
p = appendp(p, ASet, regAddr(REG_SP))
|
|
|
|
|
p.Spadj = int32(framesize)
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-22 15:30:59 -08:00
|
|
|
// If the framesize is 0, then imply nosplit because it's a specially
|
|
|
|
|
// generated function.
|
|
|
|
|
needMoreStack := framesize > 0 && !s.Func().Text.From.Sym.NoSplit()
|
2019-08-20 17:39:09 -04:00
|
|
|
|
|
|
|
|
// If the maymorestack debug option is enabled, insert the
|
|
|
|
|
// call to maymorestack *before* processing resume points so
|
|
|
|
|
// we can construct a resume point after maymorestack for
|
|
|
|
|
// morestack to resume at.
|
|
|
|
|
var pMorestack = s.Func().Text
|
|
|
|
|
if needMoreStack && ctxt.Flag_maymorestack != "" {
|
|
|
|
|
p := pMorestack
|
|
|
|
|
|
|
|
|
|
// Save REGCTXT on the stack.
|
|
|
|
|
const tempFrame = 8
|
|
|
|
|
p = appendp(p, AGet, regAddr(REG_SP))
|
|
|
|
|
p = appendp(p, AI32Const, constAddr(tempFrame))
|
|
|
|
|
p = appendp(p, AI32Sub)
|
|
|
|
|
p = appendp(p, ASet, regAddr(REG_SP))
|
|
|
|
|
p.Spadj = tempFrame
|
|
|
|
|
ctxtp := obj.Addr{
|
|
|
|
|
Type: obj.TYPE_MEM,
|
|
|
|
|
Reg: REG_SP,
|
|
|
|
|
Offset: 0,
|
|
|
|
|
}
|
|
|
|
|
p = appendp(p, AMOVD, regAddr(REGCTXT), ctxtp)
|
|
|
|
|
|
|
|
|
|
// maymorestack must not itself preempt because we
|
|
|
|
|
// don't have full stack information, so this can be
|
|
|
|
|
// ACALLNORESUME.
|
|
|
|
|
p = appendp(p, ACALLNORESUME, constAddr(0))
|
|
|
|
|
// See ../x86/obj6.go
|
|
|
|
|
sym := ctxt.LookupABI(ctxt.Flag_maymorestack, s.ABI())
|
|
|
|
|
p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: sym}
|
|
|
|
|
|
|
|
|
|
// Restore REGCTXT.
|
|
|
|
|
p = appendp(p, AMOVD, ctxtp, regAddr(REGCTXT))
|
|
|
|
|
p = appendp(p, AGet, regAddr(REG_SP))
|
|
|
|
|
p = appendp(p, AI32Const, constAddr(tempFrame))
|
|
|
|
|
p = appendp(p, AI32Add)
|
|
|
|
|
p = appendp(p, ASet, regAddr(REG_SP))
|
|
|
|
|
p.Spadj = -tempFrame
|
|
|
|
|
|
|
|
|
|
// Add an explicit ARESUMEPOINT after maymorestack for
|
|
|
|
|
// morestack to resume at.
|
|
|
|
|
pMorestack = appendp(p, ARESUMEPOINT)
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-29 00:55:53 +02:00
|
|
|
// Introduce resume points for CALL instructions
|
|
|
|
|
// and collect other explicit resume points.
|
|
|
|
|
numResumePoints := 0
|
|
|
|
|
explicitBlockDepth := 0
|
|
|
|
|
pc := int64(0) // pc is only incremented when necessary, this avoids bloat of the BrTable instruction
|
|
|
|
|
var tableIdxs []uint64
|
|
|
|
|
tablePC := int64(0)
|
2020-07-19 00:30:12 -04:00
|
|
|
base := ctxt.PosTable.Pos(s.Func().Text.Pos).Base()
|
|
|
|
|
for p := s.Func().Text; p != nil; p = p.Link {
|
2018-03-29 00:55:53 +02:00
|
|
|
prevBase := base
|
|
|
|
|
base = ctxt.PosTable.Pos(p.Pos).Base()
|
|
|
|
|
switch p.As {
|
|
|
|
|
case ABlock, ALoop, AIf:
|
|
|
|
|
explicitBlockDepth++
|
|
|
|
|
|
|
|
|
|
case AEnd:
|
|
|
|
|
if explicitBlockDepth == 0 {
|
|
|
|
|
panic("End without block")
|
|
|
|
|
}
|
|
|
|
|
explicitBlockDepth--
|
|
|
|
|
|
|
|
|
|
case ARESUMEPOINT:
|
|
|
|
|
if explicitBlockDepth != 0 {
|
|
|
|
|
panic("RESUME can only be used on toplevel")
|
|
|
|
|
}
|
|
|
|
|
p.As = AEnd
|
|
|
|
|
for tablePC <= pc {
|
|
|
|
|
tableIdxs = append(tableIdxs, uint64(numResumePoints))
|
|
|
|
|
tablePC++
|
|
|
|
|
}
|
|
|
|
|
numResumePoints++
|
|
|
|
|
pc++
|
|
|
|
|
|
|
|
|
|
case obj.ACALL:
|
|
|
|
|
if explicitBlockDepth != 0 {
|
|
|
|
|
panic("CALL can only be used on toplevel, try CALLNORESUME instead")
|
|
|
|
|
}
|
|
|
|
|
appendp(p, ARESUMEPOINT)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
p.Pc = pc
|
|
|
|
|
|
|
|
|
|
// Increase pc whenever some pc-value table needs a new entry. Don't increase it
|
|
|
|
|
// more often to avoid bloat of the BrTable instruction.
|
|
|
|
|
// The "base != prevBase" condition detects inlined instructions. They are an
|
|
|
|
|
// implicit call, so entering and leaving this section affects the stack trace.
|
2018-12-04 07:58:18 -08:00
|
|
|
if p.As == ACALLNORESUME || p.As == obj.ANOP || p.As == ANop || p.Spadj != 0 || base != prevBase {
|
2018-03-29 00:55:53 +02:00
|
|
|
pc++
|
2019-01-09 11:21:07 -05:00
|
|
|
if p.To.Sym == sigpanic {
|
|
|
|
|
// The panic stack trace expects the PC at the call of sigpanic,
|
|
|
|
|
// not the next one. However, runtime.Caller subtracts 1 from the
|
|
|
|
|
// PC. To make both PC and PC-1 work (have the same line number),
|
|
|
|
|
// we advance the PC by 2 at sigpanic.
|
|
|
|
|
pc++
|
|
|
|
|
}
|
2018-03-29 00:55:53 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tableIdxs = append(tableIdxs, uint64(numResumePoints))
|
|
|
|
|
s.Size = pc + 1
|
|
|
|
|
|
2019-08-20 17:39:09 -04:00
|
|
|
if needMoreStack {
|
|
|
|
|
p := pMorestack
|
2018-03-29 00:55:53 +02:00
|
|
|
|
2023-04-19 13:21:02 -04:00
|
|
|
if framesize <= abi.StackSmall {
|
2018-03-29 00:55:53 +02:00
|
|
|
// small stack: SP <= stackguard
|
|
|
|
|
// Get SP
|
|
|
|
|
// Get g
|
|
|
|
|
// I32WrapI64
|
|
|
|
|
// I32Load $stackguard0
|
|
|
|
|
// I32GtU
|
|
|
|
|
|
|
|
|
|
p = appendp(p, AGet, regAddr(REG_SP))
|
|
|
|
|
p = appendp(p, AGet, regAddr(REGG))
|
|
|
|
|
p = appendp(p, AI32WrapI64)
|
|
|
|
|
p = appendp(p, AI32Load, constAddr(2*int64(ctxt.Arch.PtrSize))) // G.stackguard0
|
|
|
|
|
p = appendp(p, AI32LeU)
|
|
|
|
|
} else {
|
|
|
|
|
// large stack: SP-framesize <= stackguard-StackSmall
|
|
|
|
|
// SP <= stackguard+(framesize-StackSmall)
|
|
|
|
|
// Get SP
|
|
|
|
|
// Get g
|
|
|
|
|
// I32WrapI64
|
|
|
|
|
// I32Load $stackguard0
|
|
|
|
|
// I32Const $(framesize-StackSmall)
|
|
|
|
|
// I32Add
|
|
|
|
|
// I32GtU
|
|
|
|
|
|
|
|
|
|
p = appendp(p, AGet, regAddr(REG_SP))
|
|
|
|
|
p = appendp(p, AGet, regAddr(REGG))
|
|
|
|
|
p = appendp(p, AI32WrapI64)
|
|
|
|
|
p = appendp(p, AI32Load, constAddr(2*int64(ctxt.Arch.PtrSize))) // G.stackguard0
|
2023-04-19 13:21:02 -04:00
|
|
|
p = appendp(p, AI32Const, constAddr(framesize-abi.StackSmall))
|
2018-03-29 00:55:53 +02:00
|
|
|
p = appendp(p, AI32Add)
|
|
|
|
|
p = appendp(p, AI32LeU)
|
|
|
|
|
}
|
|
|
|
|
// TODO(neelance): handle wraparound case
|
|
|
|
|
|
|
|
|
|
p = appendp(p, AIf)
|
2019-08-20 17:39:09 -04:00
|
|
|
// This CALL does *not* have a resume point after it
|
|
|
|
|
// (we already inserted all of the resume points). As
|
|
|
|
|
// a result, morestack will resume at the *previous*
|
|
|
|
|
// resume point (typically, the beginning of the
|
|
|
|
|
// function) and perform the morestack check again.
|
|
|
|
|
// This is why we don't need an explicit loop like
|
|
|
|
|
// other architectures.
|
2018-03-29 00:55:53 +02:00
|
|
|
p = appendp(p, obj.ACALL, constAddr(0))
|
2020-07-19 00:30:12 -04:00
|
|
|
if s.Func().Text.From.Sym.NeedCtxt() {
|
2018-03-29 00:55:53 +02:00
|
|
|
p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestack}
|
|
|
|
|
} else {
|
|
|
|
|
p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestackNoCtxt}
|
|
|
|
|
}
|
|
|
|
|
p = appendp(p, AEnd)
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-17 13:45:46 +01:00
|
|
|
// record the branches targeting the entry loop and the unwind exit,
|
|
|
|
|
// their targets with be filled in later
|
|
|
|
|
var entryPointLoopBranches []*obj.Prog
|
|
|
|
|
var unwindExitBranches []*obj.Prog
|
2018-03-29 00:55:53 +02:00
|
|
|
currentDepth := 0
|
2020-07-19 00:30:12 -04:00
|
|
|
for p := s.Func().Text; p != nil; p = p.Link {
|
2018-03-29 00:55:53 +02:00
|
|
|
switch p.As {
|
|
|
|
|
case ABlock, ALoop, AIf:
|
|
|
|
|
currentDepth++
|
|
|
|
|
case AEnd:
|
|
|
|
|
currentDepth--
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch p.As {
|
|
|
|
|
case obj.AJMP:
|
|
|
|
|
jmp := *p
|
|
|
|
|
p.As = obj.ANOP
|
|
|
|
|
|
|
|
|
|
if jmp.To.Type == obj.TYPE_BRANCH {
|
|
|
|
|
// jump to basic block
|
|
|
|
|
p = appendp(p, AI32Const, constAddr(jmp.To.Val.(*obj.Prog).Pc))
|
2019-03-17 13:45:46 +01:00
|
|
|
p = appendp(p, ASet, regAddr(REG_PC_B)) // write next basic block to PC_B
|
|
|
|
|
p = appendp(p, ABr) // jump to beginning of entryPointLoop
|
|
|
|
|
entryPointLoopBranches = append(entryPointLoopBranches, p)
|
2018-03-29 00:55:53 +02:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// low-level WebAssembly call to function
|
|
|
|
|
switch jmp.To.Type {
|
|
|
|
|
case obj.TYPE_MEM:
|
cmd, runtime: remove PC_F & PC_B globals on Wasm
Following the previous CL, this removes more global variables on
Wasm.
PC_B is used mostly for intra-function jumps, and for a function
telling its callee where to start or resume. This usage can be
served by a parameter. The top level loop (wasm_pc_f_loop) uses
PC_B for resuming a function. This value is either set by gogo,
or loaded from the Go stack at function return. Instead of
loading PC_B at each function return, we could make gogo stores
PC_B at the same stack location, and let the top level loop do
the load. This way, we don't need to use global PC_B to
communicate with the top level loop, and we can replace global
PC_B with a parameter.
PC_F is similar. It is even more so in that the only reader is
the top level loop. Let the top level loop read it from the stack,
and we can get rid of PC_F entirely.
PC_F and PC_B are used less entensively as SP, so this CL has
smaller performance gain.
Running on Chrome 74.0.3729.108 on Linux/AMD64,
name old time/op new time/op delta
BinaryTree17 16.6s ± 0% 16.2s ± 1% -2.59% (p=0.016 n=4+5)
Fannkuch11 11.1s ± 1% 10.8s ± 0% -2.65% (p=0.008 n=5+5)
FmtFprintfEmpty 231ns ± 1% 217ns ± 0% -6.06% (p=0.008 n=5+5)
FmtFprintfString 407ns ± 3% 375ns ± 2% -7.81% (p=0.008 n=5+5)
FmtFprintfInt 466ns ± 2% 430ns ± 0% -7.79% (p=0.016 n=5+4)
FmtFprintfIntInt 719ns ± 2% 673ns ± 2% -6.37% (p=0.008 n=5+5)
FmtFprintfPrefixedInt 706ns ± 1% 676ns ± 3% -4.31% (p=0.008 n=5+5)
FmtFprintfFloat 1.01µs ± 1% 0.97µs ± 1% -4.30% (p=0.008 n=5+5)
FmtManyArgs 2.67µs ± 1% 2.51µs ± 1% -5.95% (p=0.008 n=5+5)
GobDecode 30.7ms ± 9% 31.3ms ±34% ~ (p=0.222 n=5+5)
GobEncode 24.2ms ±23% 20.2ms ± 0% -16.36% (p=0.016 n=5+4)
Gzip 852ms ± 0% 823ms ± 0% -3.38% (p=0.016 n=4+5)
Gunzip 160ms ± 1% 151ms ± 1% -5.37% (p=0.008 n=5+5)
JSONEncode 35.7ms ± 1% 34.3ms ± 1% -3.81% (p=0.008 n=5+5)
JSONDecode 247ms ± 8% 254ms ± 7% ~ (p=0.548 n=5+5)
Mandelbrot200 5.39ms ± 0% 5.41ms ± 0% +0.42% (p=0.008 n=5+5)
GoParse 18.5ms ± 1% 18.3ms ± 2% ~ (p=0.343 n=4+4)
RegexpMatchEasy0_32 424ns ± 2% 397ns ± 0% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 2.88µs ± 0% 2.86µs ± 1% ~ (p=0.079 n=5+5)
RegexpMatchEasy1_32 395ns ± 2% 370ns ± 1% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 3.26µs ± 0% 3.19µs ± 1% -2.06% (p=0.008 n=5+5)
RegexpMatchMedium_32 564ns ± 1% 532ns ± 0% -5.71% (p=0.008 n=5+5)
RegexpMatchMedium_1K 146µs ± 2% 140µs ± 1% -4.62% (p=0.008 n=5+5)
RegexpMatchHard_32 8.47µs ± 1% 7.91µs ± 1% -6.65% (p=0.008 n=5+5)
RegexpMatchHard_1K 253µs ± 1% 236µs ± 2% -6.66% (p=0.008 n=5+5)
Revcomp 1.78s ± 4% 1.76s ± 5% ~ (p=1.000 n=5+5)
Template 292ms ±29% 269ms ± 5% ~ (p=0.690 n=5+5)
TimeParse 1.61µs ± 4% 1.54µs ± 1% -4.42% (p=0.008 n=5+5)
TimeFormat 1.66µs ± 3% 1.58µs ± 1% -5.22% (p=0.008 n=5+5)
[Geo mean] 232µs 221µs -4.54%
name old speed new speed delta
GobDecode 25.0MB/s ± 8% 25.1MB/s ±27% ~ (p=0.222 n=5+5)
GobEncode 32.8MB/s ±21% 38.0MB/s ± 0% +15.84% (p=0.016 n=5+4)
Gzip 22.8MB/s ± 0% 23.6MB/s ± 0% +3.49% (p=0.016 n=4+5)
Gunzip 121MB/s ± 1% 128MB/s ± 1% +5.68% (p=0.008 n=5+5)
JSONEncode 54.4MB/s ± 1% 56.5MB/s ± 1% +3.97% (p=0.008 n=5+5)
JSONDecode 7.88MB/s ± 8% 7.65MB/s ± 8% ~ (p=0.548 n=5+5)
GoParse 3.07MB/s ± 8% 3.00MB/s ±22% ~ (p=0.579 n=5+5)
RegexpMatchEasy0_32 75.6MB/s ± 2% 80.5MB/s ± 0% +6.58% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 356MB/s ± 0% 358MB/s ± 1% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32 81.1MB/s ± 2% 86.5MB/s ± 1% +6.69% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 314MB/s ± 0% 320MB/s ± 0% +2.10% (p=0.008 n=5+5)
RegexpMatchMedium_32 1.77MB/s ± 1% 1.88MB/s ± 0% +6.09% (p=0.016 n=5+4)
RegexpMatchMedium_1K 6.99MB/s ± 2% 7.33MB/s ± 1% +4.83% (p=0.008 n=5+5)
RegexpMatchHard_32 3.78MB/s ± 1% 4.04MB/s ± 1% +7.04% (p=0.008 n=5+5)
RegexpMatchHard_1K 4.04MB/s ± 1% 4.33MB/s ± 2% +7.17% (p=0.008 n=5+5)
Revcomp 143MB/s ± 4% 145MB/s ± 5% ~ (p=1.000 n=5+5)
Template 6.77MB/s ±24% 7.22MB/s ± 5% ~ (p=0.690 n=5+5)
[Geo mean] 24.4MB/s 25.4MB/s +4.18%
Change-Id: Ib80716e62992aec28b2c4a96af280c278f83aa49
Reviewed-on: https://go-review.googlesource.com/c/go/+/173980
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2019-04-25 17:25:54 -04:00
|
|
|
if !notUsePC_B[jmp.To.Sym.Name] {
|
|
|
|
|
// Set PC_B parameter to function entry.
|
|
|
|
|
p = appendp(p, AI32Const, constAddr(0))
|
|
|
|
|
}
|
2018-03-29 00:55:53 +02:00
|
|
|
p = appendp(p, ACall, jmp.To)
|
cmd, runtime: remove PC_F & PC_B globals on Wasm
Following the previous CL, this removes more global variables on
Wasm.
PC_B is used mostly for intra-function jumps, and for a function
telling its callee where to start or resume. This usage can be
served by a parameter. The top level loop (wasm_pc_f_loop) uses
PC_B for resuming a function. This value is either set by gogo,
or loaded from the Go stack at function return. Instead of
loading PC_B at each function return, we could make gogo stores
PC_B at the same stack location, and let the top level loop do
the load. This way, we don't need to use global PC_B to
communicate with the top level loop, and we can replace global
PC_B with a parameter.
PC_F is similar. It is even more so in that the only reader is
the top level loop. Let the top level loop read it from the stack,
and we can get rid of PC_F entirely.
PC_F and PC_B are used less entensively as SP, so this CL has
smaller performance gain.
Running on Chrome 74.0.3729.108 on Linux/AMD64,
name old time/op new time/op delta
BinaryTree17 16.6s ± 0% 16.2s ± 1% -2.59% (p=0.016 n=4+5)
Fannkuch11 11.1s ± 1% 10.8s ± 0% -2.65% (p=0.008 n=5+5)
FmtFprintfEmpty 231ns ± 1% 217ns ± 0% -6.06% (p=0.008 n=5+5)
FmtFprintfString 407ns ± 3% 375ns ± 2% -7.81% (p=0.008 n=5+5)
FmtFprintfInt 466ns ± 2% 430ns ± 0% -7.79% (p=0.016 n=5+4)
FmtFprintfIntInt 719ns ± 2% 673ns ± 2% -6.37% (p=0.008 n=5+5)
FmtFprintfPrefixedInt 706ns ± 1% 676ns ± 3% -4.31% (p=0.008 n=5+5)
FmtFprintfFloat 1.01µs ± 1% 0.97µs ± 1% -4.30% (p=0.008 n=5+5)
FmtManyArgs 2.67µs ± 1% 2.51µs ± 1% -5.95% (p=0.008 n=5+5)
GobDecode 30.7ms ± 9% 31.3ms ±34% ~ (p=0.222 n=5+5)
GobEncode 24.2ms ±23% 20.2ms ± 0% -16.36% (p=0.016 n=5+4)
Gzip 852ms ± 0% 823ms ± 0% -3.38% (p=0.016 n=4+5)
Gunzip 160ms ± 1% 151ms ± 1% -5.37% (p=0.008 n=5+5)
JSONEncode 35.7ms ± 1% 34.3ms ± 1% -3.81% (p=0.008 n=5+5)
JSONDecode 247ms ± 8% 254ms ± 7% ~ (p=0.548 n=5+5)
Mandelbrot200 5.39ms ± 0% 5.41ms ± 0% +0.42% (p=0.008 n=5+5)
GoParse 18.5ms ± 1% 18.3ms ± 2% ~ (p=0.343 n=4+4)
RegexpMatchEasy0_32 424ns ± 2% 397ns ± 0% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 2.88µs ± 0% 2.86µs ± 1% ~ (p=0.079 n=5+5)
RegexpMatchEasy1_32 395ns ± 2% 370ns ± 1% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 3.26µs ± 0% 3.19µs ± 1% -2.06% (p=0.008 n=5+5)
RegexpMatchMedium_32 564ns ± 1% 532ns ± 0% -5.71% (p=0.008 n=5+5)
RegexpMatchMedium_1K 146µs ± 2% 140µs ± 1% -4.62% (p=0.008 n=5+5)
RegexpMatchHard_32 8.47µs ± 1% 7.91µs ± 1% -6.65% (p=0.008 n=5+5)
RegexpMatchHard_1K 253µs ± 1% 236µs ± 2% -6.66% (p=0.008 n=5+5)
Revcomp 1.78s ± 4% 1.76s ± 5% ~ (p=1.000 n=5+5)
Template 292ms ±29% 269ms ± 5% ~ (p=0.690 n=5+5)
TimeParse 1.61µs ± 4% 1.54µs ± 1% -4.42% (p=0.008 n=5+5)
TimeFormat 1.66µs ± 3% 1.58µs ± 1% -5.22% (p=0.008 n=5+5)
[Geo mean] 232µs 221µs -4.54%
name old speed new speed delta
GobDecode 25.0MB/s ± 8% 25.1MB/s ±27% ~ (p=0.222 n=5+5)
GobEncode 32.8MB/s ±21% 38.0MB/s ± 0% +15.84% (p=0.016 n=5+4)
Gzip 22.8MB/s ± 0% 23.6MB/s ± 0% +3.49% (p=0.016 n=4+5)
Gunzip 121MB/s ± 1% 128MB/s ± 1% +5.68% (p=0.008 n=5+5)
JSONEncode 54.4MB/s ± 1% 56.5MB/s ± 1% +3.97% (p=0.008 n=5+5)
JSONDecode 7.88MB/s ± 8% 7.65MB/s ± 8% ~ (p=0.548 n=5+5)
GoParse 3.07MB/s ± 8% 3.00MB/s ±22% ~ (p=0.579 n=5+5)
RegexpMatchEasy0_32 75.6MB/s ± 2% 80.5MB/s ± 0% +6.58% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 356MB/s ± 0% 358MB/s ± 1% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32 81.1MB/s ± 2% 86.5MB/s ± 1% +6.69% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 314MB/s ± 0% 320MB/s ± 0% +2.10% (p=0.008 n=5+5)
RegexpMatchMedium_32 1.77MB/s ± 1% 1.88MB/s ± 0% +6.09% (p=0.016 n=5+4)
RegexpMatchMedium_1K 6.99MB/s ± 2% 7.33MB/s ± 1% +4.83% (p=0.008 n=5+5)
RegexpMatchHard_32 3.78MB/s ± 1% 4.04MB/s ± 1% +7.04% (p=0.008 n=5+5)
RegexpMatchHard_1K 4.04MB/s ± 1% 4.33MB/s ± 2% +7.17% (p=0.008 n=5+5)
Revcomp 143MB/s ± 4% 145MB/s ± 5% ~ (p=1.000 n=5+5)
Template 6.77MB/s ±24% 7.22MB/s ± 5% ~ (p=0.690 n=5+5)
[Geo mean] 24.4MB/s 25.4MB/s +4.18%
Change-Id: Ib80716e62992aec28b2c4a96af280c278f83aa49
Reviewed-on: https://go-review.googlesource.com/c/go/+/173980
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2019-04-25 17:25:54 -04:00
|
|
|
|
2018-03-29 00:55:53 +02:00
|
|
|
case obj.TYPE_NONE:
|
|
|
|
|
// (target PC is on stack)
|
|
|
|
|
p = appendp(p, AI32WrapI64)
|
|
|
|
|
p = appendp(p, AI32Const, constAddr(16)) // only needs PC_F bits (16-31), PC_B bits (0-15) are zero
|
|
|
|
|
p = appendp(p, AI32ShrU)
|
cmd, runtime: remove PC_F & PC_B globals on Wasm
Following the previous CL, this removes more global variables on
Wasm.
PC_B is used mostly for intra-function jumps, and for a function
telling its callee where to start or resume. This usage can be
served by a parameter. The top level loop (wasm_pc_f_loop) uses
PC_B for resuming a function. This value is either set by gogo,
or loaded from the Go stack at function return. Instead of
loading PC_B at each function return, we could make gogo stores
PC_B at the same stack location, and let the top level loop do
the load. This way, we don't need to use global PC_B to
communicate with the top level loop, and we can replace global
PC_B with a parameter.
PC_F is similar. It is even more so in that the only reader is
the top level loop. Let the top level loop read it from the stack,
and we can get rid of PC_F entirely.
PC_F and PC_B are used less entensively as SP, so this CL has
smaller performance gain.
Running on Chrome 74.0.3729.108 on Linux/AMD64,
name old time/op new time/op delta
BinaryTree17 16.6s ± 0% 16.2s ± 1% -2.59% (p=0.016 n=4+5)
Fannkuch11 11.1s ± 1% 10.8s ± 0% -2.65% (p=0.008 n=5+5)
FmtFprintfEmpty 231ns ± 1% 217ns ± 0% -6.06% (p=0.008 n=5+5)
FmtFprintfString 407ns ± 3% 375ns ± 2% -7.81% (p=0.008 n=5+5)
FmtFprintfInt 466ns ± 2% 430ns ± 0% -7.79% (p=0.016 n=5+4)
FmtFprintfIntInt 719ns ± 2% 673ns ± 2% -6.37% (p=0.008 n=5+5)
FmtFprintfPrefixedInt 706ns ± 1% 676ns ± 3% -4.31% (p=0.008 n=5+5)
FmtFprintfFloat 1.01µs ± 1% 0.97µs ± 1% -4.30% (p=0.008 n=5+5)
FmtManyArgs 2.67µs ± 1% 2.51µs ± 1% -5.95% (p=0.008 n=5+5)
GobDecode 30.7ms ± 9% 31.3ms ±34% ~ (p=0.222 n=5+5)
GobEncode 24.2ms ±23% 20.2ms ± 0% -16.36% (p=0.016 n=5+4)
Gzip 852ms ± 0% 823ms ± 0% -3.38% (p=0.016 n=4+5)
Gunzip 160ms ± 1% 151ms ± 1% -5.37% (p=0.008 n=5+5)
JSONEncode 35.7ms ± 1% 34.3ms ± 1% -3.81% (p=0.008 n=5+5)
JSONDecode 247ms ± 8% 254ms ± 7% ~ (p=0.548 n=5+5)
Mandelbrot200 5.39ms ± 0% 5.41ms ± 0% +0.42% (p=0.008 n=5+5)
GoParse 18.5ms ± 1% 18.3ms ± 2% ~ (p=0.343 n=4+4)
RegexpMatchEasy0_32 424ns ± 2% 397ns ± 0% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 2.88µs ± 0% 2.86µs ± 1% ~ (p=0.079 n=5+5)
RegexpMatchEasy1_32 395ns ± 2% 370ns ± 1% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 3.26µs ± 0% 3.19µs ± 1% -2.06% (p=0.008 n=5+5)
RegexpMatchMedium_32 564ns ± 1% 532ns ± 0% -5.71% (p=0.008 n=5+5)
RegexpMatchMedium_1K 146µs ± 2% 140µs ± 1% -4.62% (p=0.008 n=5+5)
RegexpMatchHard_32 8.47µs ± 1% 7.91µs ± 1% -6.65% (p=0.008 n=5+5)
RegexpMatchHard_1K 253µs ± 1% 236µs ± 2% -6.66% (p=0.008 n=5+5)
Revcomp 1.78s ± 4% 1.76s ± 5% ~ (p=1.000 n=5+5)
Template 292ms ±29% 269ms ± 5% ~ (p=0.690 n=5+5)
TimeParse 1.61µs ± 4% 1.54µs ± 1% -4.42% (p=0.008 n=5+5)
TimeFormat 1.66µs ± 3% 1.58µs ± 1% -5.22% (p=0.008 n=5+5)
[Geo mean] 232µs 221µs -4.54%
name old speed new speed delta
GobDecode 25.0MB/s ± 8% 25.1MB/s ±27% ~ (p=0.222 n=5+5)
GobEncode 32.8MB/s ±21% 38.0MB/s ± 0% +15.84% (p=0.016 n=5+4)
Gzip 22.8MB/s ± 0% 23.6MB/s ± 0% +3.49% (p=0.016 n=4+5)
Gunzip 121MB/s ± 1% 128MB/s ± 1% +5.68% (p=0.008 n=5+5)
JSONEncode 54.4MB/s ± 1% 56.5MB/s ± 1% +3.97% (p=0.008 n=5+5)
JSONDecode 7.88MB/s ± 8% 7.65MB/s ± 8% ~ (p=0.548 n=5+5)
GoParse 3.07MB/s ± 8% 3.00MB/s ±22% ~ (p=0.579 n=5+5)
RegexpMatchEasy0_32 75.6MB/s ± 2% 80.5MB/s ± 0% +6.58% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 356MB/s ± 0% 358MB/s ± 1% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32 81.1MB/s ± 2% 86.5MB/s ± 1% +6.69% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 314MB/s ± 0% 320MB/s ± 0% +2.10% (p=0.008 n=5+5)
RegexpMatchMedium_32 1.77MB/s ± 1% 1.88MB/s ± 0% +6.09% (p=0.016 n=5+4)
RegexpMatchMedium_1K 6.99MB/s ± 2% 7.33MB/s ± 1% +4.83% (p=0.008 n=5+5)
RegexpMatchHard_32 3.78MB/s ± 1% 4.04MB/s ± 1% +7.04% (p=0.008 n=5+5)
RegexpMatchHard_1K 4.04MB/s ± 1% 4.33MB/s ± 2% +7.17% (p=0.008 n=5+5)
Revcomp 143MB/s ± 4% 145MB/s ± 5% ~ (p=1.000 n=5+5)
Template 6.77MB/s ±24% 7.22MB/s ± 5% ~ (p=0.690 n=5+5)
[Geo mean] 24.4MB/s 25.4MB/s +4.18%
Change-Id: Ib80716e62992aec28b2c4a96af280c278f83aa49
Reviewed-on: https://go-review.googlesource.com/c/go/+/173980
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2019-04-25 17:25:54 -04:00
|
|
|
|
|
|
|
|
// Set PC_B parameter to function entry.
|
|
|
|
|
// We need to push this before pushing the target PC_F,
|
|
|
|
|
// so temporarily pop PC_F, using our REG_PC_B as a
|
|
|
|
|
// scratch register, and push it back after pushing 0.
|
|
|
|
|
p = appendp(p, ASet, regAddr(REG_PC_B))
|
|
|
|
|
p = appendp(p, AI32Const, constAddr(0))
|
|
|
|
|
p = appendp(p, AGet, regAddr(REG_PC_B))
|
|
|
|
|
|
2018-03-29 00:55:53 +02:00
|
|
|
p = appendp(p, ACallIndirect)
|
cmd, runtime: remove PC_F & PC_B globals on Wasm
Following the previous CL, this removes more global variables on
Wasm.
PC_B is used mostly for intra-function jumps, and for a function
telling its callee where to start or resume. This usage can be
served by a parameter. The top level loop (wasm_pc_f_loop) uses
PC_B for resuming a function. This value is either set by gogo,
or loaded from the Go stack at function return. Instead of
loading PC_B at each function return, we could make gogo stores
PC_B at the same stack location, and let the top level loop do
the load. This way, we don't need to use global PC_B to
communicate with the top level loop, and we can replace global
PC_B with a parameter.
PC_F is similar. It is even more so in that the only reader is
the top level loop. Let the top level loop read it from the stack,
and we can get rid of PC_F entirely.
PC_F and PC_B are used less entensively as SP, so this CL has
smaller performance gain.
Running on Chrome 74.0.3729.108 on Linux/AMD64,
name old time/op new time/op delta
BinaryTree17 16.6s ± 0% 16.2s ± 1% -2.59% (p=0.016 n=4+5)
Fannkuch11 11.1s ± 1% 10.8s ± 0% -2.65% (p=0.008 n=5+5)
FmtFprintfEmpty 231ns ± 1% 217ns ± 0% -6.06% (p=0.008 n=5+5)
FmtFprintfString 407ns ± 3% 375ns ± 2% -7.81% (p=0.008 n=5+5)
FmtFprintfInt 466ns ± 2% 430ns ± 0% -7.79% (p=0.016 n=5+4)
FmtFprintfIntInt 719ns ± 2% 673ns ± 2% -6.37% (p=0.008 n=5+5)
FmtFprintfPrefixedInt 706ns ± 1% 676ns ± 3% -4.31% (p=0.008 n=5+5)
FmtFprintfFloat 1.01µs ± 1% 0.97µs ± 1% -4.30% (p=0.008 n=5+5)
FmtManyArgs 2.67µs ± 1% 2.51µs ± 1% -5.95% (p=0.008 n=5+5)
GobDecode 30.7ms ± 9% 31.3ms ±34% ~ (p=0.222 n=5+5)
GobEncode 24.2ms ±23% 20.2ms ± 0% -16.36% (p=0.016 n=5+4)
Gzip 852ms ± 0% 823ms ± 0% -3.38% (p=0.016 n=4+5)
Gunzip 160ms ± 1% 151ms ± 1% -5.37% (p=0.008 n=5+5)
JSONEncode 35.7ms ± 1% 34.3ms ± 1% -3.81% (p=0.008 n=5+5)
JSONDecode 247ms ± 8% 254ms ± 7% ~ (p=0.548 n=5+5)
Mandelbrot200 5.39ms ± 0% 5.41ms ± 0% +0.42% (p=0.008 n=5+5)
GoParse 18.5ms ± 1% 18.3ms ± 2% ~ (p=0.343 n=4+4)
RegexpMatchEasy0_32 424ns ± 2% 397ns ± 0% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 2.88µs ± 0% 2.86µs ± 1% ~ (p=0.079 n=5+5)
RegexpMatchEasy1_32 395ns ± 2% 370ns ± 1% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 3.26µs ± 0% 3.19µs ± 1% -2.06% (p=0.008 n=5+5)
RegexpMatchMedium_32 564ns ± 1% 532ns ± 0% -5.71% (p=0.008 n=5+5)
RegexpMatchMedium_1K 146µs ± 2% 140µs ± 1% -4.62% (p=0.008 n=5+5)
RegexpMatchHard_32 8.47µs ± 1% 7.91µs ± 1% -6.65% (p=0.008 n=5+5)
RegexpMatchHard_1K 253µs ± 1% 236µs ± 2% -6.66% (p=0.008 n=5+5)
Revcomp 1.78s ± 4% 1.76s ± 5% ~ (p=1.000 n=5+5)
Template 292ms ±29% 269ms ± 5% ~ (p=0.690 n=5+5)
TimeParse 1.61µs ± 4% 1.54µs ± 1% -4.42% (p=0.008 n=5+5)
TimeFormat 1.66µs ± 3% 1.58µs ± 1% -5.22% (p=0.008 n=5+5)
[Geo mean] 232µs 221µs -4.54%
name old speed new speed delta
GobDecode 25.0MB/s ± 8% 25.1MB/s ±27% ~ (p=0.222 n=5+5)
GobEncode 32.8MB/s ±21% 38.0MB/s ± 0% +15.84% (p=0.016 n=5+4)
Gzip 22.8MB/s ± 0% 23.6MB/s ± 0% +3.49% (p=0.016 n=4+5)
Gunzip 121MB/s ± 1% 128MB/s ± 1% +5.68% (p=0.008 n=5+5)
JSONEncode 54.4MB/s ± 1% 56.5MB/s ± 1% +3.97% (p=0.008 n=5+5)
JSONDecode 7.88MB/s ± 8% 7.65MB/s ± 8% ~ (p=0.548 n=5+5)
GoParse 3.07MB/s ± 8% 3.00MB/s ±22% ~ (p=0.579 n=5+5)
RegexpMatchEasy0_32 75.6MB/s ± 2% 80.5MB/s ± 0% +6.58% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 356MB/s ± 0% 358MB/s ± 1% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32 81.1MB/s ± 2% 86.5MB/s ± 1% +6.69% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 314MB/s ± 0% 320MB/s ± 0% +2.10% (p=0.008 n=5+5)
RegexpMatchMedium_32 1.77MB/s ± 1% 1.88MB/s ± 0% +6.09% (p=0.016 n=5+4)
RegexpMatchMedium_1K 6.99MB/s ± 2% 7.33MB/s ± 1% +4.83% (p=0.008 n=5+5)
RegexpMatchHard_32 3.78MB/s ± 1% 4.04MB/s ± 1% +7.04% (p=0.008 n=5+5)
RegexpMatchHard_1K 4.04MB/s ± 1% 4.33MB/s ± 2% +7.17% (p=0.008 n=5+5)
Revcomp 143MB/s ± 4% 145MB/s ± 5% ~ (p=1.000 n=5+5)
Template 6.77MB/s ±24% 7.22MB/s ± 5% ~ (p=0.690 n=5+5)
[Geo mean] 24.4MB/s 25.4MB/s +4.18%
Change-Id: Ib80716e62992aec28b2c4a96af280c278f83aa49
Reviewed-on: https://go-review.googlesource.com/c/go/+/173980
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2019-04-25 17:25:54 -04:00
|
|
|
|
2018-03-29 00:55:53 +02:00
|
|
|
default:
|
|
|
|
|
panic("bad target for JMP")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
p = appendp(p, AReturn)
|
|
|
|
|
|
|
|
|
|
case obj.ACALL, ACALLNORESUME:
|
|
|
|
|
call := *p
|
|
|
|
|
p.As = obj.ANOP
|
|
|
|
|
|
|
|
|
|
pcAfterCall := call.Link.Pc
|
|
|
|
|
if call.To.Sym == sigpanic {
|
|
|
|
|
pcAfterCall-- // sigpanic expects to be called without advancing the pc
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// SP -= 8
|
|
|
|
|
p = appendp(p, AGet, regAddr(REG_SP))
|
|
|
|
|
p = appendp(p, AI32Const, constAddr(8))
|
|
|
|
|
p = appendp(p, AI32Sub)
|
|
|
|
|
p = appendp(p, ASet, regAddr(REG_SP))
|
|
|
|
|
|
|
|
|
|
// write return address to Go stack
|
|
|
|
|
p = appendp(p, AGet, regAddr(REG_SP))
|
|
|
|
|
p = appendp(p, AI64Const, obj.Addr{
|
|
|
|
|
Type: obj.TYPE_ADDR,
|
|
|
|
|
Name: obj.NAME_EXTERN,
|
|
|
|
|
Sym: s, // PC_F
|
|
|
|
|
Offset: pcAfterCall, // PC_B
|
|
|
|
|
})
|
|
|
|
|
p = appendp(p, AI64Store, constAddr(0))
|
|
|
|
|
|
|
|
|
|
// low-level WebAssembly call to function
|
|
|
|
|
switch call.To.Type {
|
|
|
|
|
case obj.TYPE_MEM:
|
cmd, runtime: remove PC_F & PC_B globals on Wasm
Following the previous CL, this removes more global variables on
Wasm.
PC_B is used mostly for intra-function jumps, and for a function
telling its callee where to start or resume. This usage can be
served by a parameter. The top level loop (wasm_pc_f_loop) uses
PC_B for resuming a function. This value is either set by gogo,
or loaded from the Go stack at function return. Instead of
loading PC_B at each function return, we could make gogo stores
PC_B at the same stack location, and let the top level loop do
the load. This way, we don't need to use global PC_B to
communicate with the top level loop, and we can replace global
PC_B with a parameter.
PC_F is similar. It is even more so in that the only reader is
the top level loop. Let the top level loop read it from the stack,
and we can get rid of PC_F entirely.
PC_F and PC_B are used less entensively as SP, so this CL has
smaller performance gain.
Running on Chrome 74.0.3729.108 on Linux/AMD64,
name old time/op new time/op delta
BinaryTree17 16.6s ± 0% 16.2s ± 1% -2.59% (p=0.016 n=4+5)
Fannkuch11 11.1s ± 1% 10.8s ± 0% -2.65% (p=0.008 n=5+5)
FmtFprintfEmpty 231ns ± 1% 217ns ± 0% -6.06% (p=0.008 n=5+5)
FmtFprintfString 407ns ± 3% 375ns ± 2% -7.81% (p=0.008 n=5+5)
FmtFprintfInt 466ns ± 2% 430ns ± 0% -7.79% (p=0.016 n=5+4)
FmtFprintfIntInt 719ns ± 2% 673ns ± 2% -6.37% (p=0.008 n=5+5)
FmtFprintfPrefixedInt 706ns ± 1% 676ns ± 3% -4.31% (p=0.008 n=5+5)
FmtFprintfFloat 1.01µs ± 1% 0.97µs ± 1% -4.30% (p=0.008 n=5+5)
FmtManyArgs 2.67µs ± 1% 2.51µs ± 1% -5.95% (p=0.008 n=5+5)
GobDecode 30.7ms ± 9% 31.3ms ±34% ~ (p=0.222 n=5+5)
GobEncode 24.2ms ±23% 20.2ms ± 0% -16.36% (p=0.016 n=5+4)
Gzip 852ms ± 0% 823ms ± 0% -3.38% (p=0.016 n=4+5)
Gunzip 160ms ± 1% 151ms ± 1% -5.37% (p=0.008 n=5+5)
JSONEncode 35.7ms ± 1% 34.3ms ± 1% -3.81% (p=0.008 n=5+5)
JSONDecode 247ms ± 8% 254ms ± 7% ~ (p=0.548 n=5+5)
Mandelbrot200 5.39ms ± 0% 5.41ms ± 0% +0.42% (p=0.008 n=5+5)
GoParse 18.5ms ± 1% 18.3ms ± 2% ~ (p=0.343 n=4+4)
RegexpMatchEasy0_32 424ns ± 2% 397ns ± 0% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 2.88µs ± 0% 2.86µs ± 1% ~ (p=0.079 n=5+5)
RegexpMatchEasy1_32 395ns ± 2% 370ns ± 1% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 3.26µs ± 0% 3.19µs ± 1% -2.06% (p=0.008 n=5+5)
RegexpMatchMedium_32 564ns ± 1% 532ns ± 0% -5.71% (p=0.008 n=5+5)
RegexpMatchMedium_1K 146µs ± 2% 140µs ± 1% -4.62% (p=0.008 n=5+5)
RegexpMatchHard_32 8.47µs ± 1% 7.91µs ± 1% -6.65% (p=0.008 n=5+5)
RegexpMatchHard_1K 253µs ± 1% 236µs ± 2% -6.66% (p=0.008 n=5+5)
Revcomp 1.78s ± 4% 1.76s ± 5% ~ (p=1.000 n=5+5)
Template 292ms ±29% 269ms ± 5% ~ (p=0.690 n=5+5)
TimeParse 1.61µs ± 4% 1.54µs ± 1% -4.42% (p=0.008 n=5+5)
TimeFormat 1.66µs ± 3% 1.58µs ± 1% -5.22% (p=0.008 n=5+5)
[Geo mean] 232µs 221µs -4.54%
name old speed new speed delta
GobDecode 25.0MB/s ± 8% 25.1MB/s ±27% ~ (p=0.222 n=5+5)
GobEncode 32.8MB/s ±21% 38.0MB/s ± 0% +15.84% (p=0.016 n=5+4)
Gzip 22.8MB/s ± 0% 23.6MB/s ± 0% +3.49% (p=0.016 n=4+5)
Gunzip 121MB/s ± 1% 128MB/s ± 1% +5.68% (p=0.008 n=5+5)
JSONEncode 54.4MB/s ± 1% 56.5MB/s ± 1% +3.97% (p=0.008 n=5+5)
JSONDecode 7.88MB/s ± 8% 7.65MB/s ± 8% ~ (p=0.548 n=5+5)
GoParse 3.07MB/s ± 8% 3.00MB/s ±22% ~ (p=0.579 n=5+5)
RegexpMatchEasy0_32 75.6MB/s ± 2% 80.5MB/s ± 0% +6.58% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 356MB/s ± 0% 358MB/s ± 1% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32 81.1MB/s ± 2% 86.5MB/s ± 1% +6.69% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 314MB/s ± 0% 320MB/s ± 0% +2.10% (p=0.008 n=5+5)
RegexpMatchMedium_32 1.77MB/s ± 1% 1.88MB/s ± 0% +6.09% (p=0.016 n=5+4)
RegexpMatchMedium_1K 6.99MB/s ± 2% 7.33MB/s ± 1% +4.83% (p=0.008 n=5+5)
RegexpMatchHard_32 3.78MB/s ± 1% 4.04MB/s ± 1% +7.04% (p=0.008 n=5+5)
RegexpMatchHard_1K 4.04MB/s ± 1% 4.33MB/s ± 2% +7.17% (p=0.008 n=5+5)
Revcomp 143MB/s ± 4% 145MB/s ± 5% ~ (p=1.000 n=5+5)
Template 6.77MB/s ±24% 7.22MB/s ± 5% ~ (p=0.690 n=5+5)
[Geo mean] 24.4MB/s 25.4MB/s +4.18%
Change-Id: Ib80716e62992aec28b2c4a96af280c278f83aa49
Reviewed-on: https://go-review.googlesource.com/c/go/+/173980
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2019-04-25 17:25:54 -04:00
|
|
|
if !notUsePC_B[call.To.Sym.Name] {
|
|
|
|
|
// Set PC_B parameter to function entry.
|
|
|
|
|
p = appendp(p, AI32Const, constAddr(0))
|
|
|
|
|
}
|
2018-03-29 00:55:53 +02:00
|
|
|
p = appendp(p, ACall, call.To)
|
cmd, runtime: remove PC_F & PC_B globals on Wasm
Following the previous CL, this removes more global variables on
Wasm.
PC_B is used mostly for intra-function jumps, and for a function
telling its callee where to start or resume. This usage can be
served by a parameter. The top level loop (wasm_pc_f_loop) uses
PC_B for resuming a function. This value is either set by gogo,
or loaded from the Go stack at function return. Instead of
loading PC_B at each function return, we could make gogo stores
PC_B at the same stack location, and let the top level loop do
the load. This way, we don't need to use global PC_B to
communicate with the top level loop, and we can replace global
PC_B with a parameter.
PC_F is similar. It is even more so in that the only reader is
the top level loop. Let the top level loop read it from the stack,
and we can get rid of PC_F entirely.
PC_F and PC_B are used less entensively as SP, so this CL has
smaller performance gain.
Running on Chrome 74.0.3729.108 on Linux/AMD64,
name old time/op new time/op delta
BinaryTree17 16.6s ± 0% 16.2s ± 1% -2.59% (p=0.016 n=4+5)
Fannkuch11 11.1s ± 1% 10.8s ± 0% -2.65% (p=0.008 n=5+5)
FmtFprintfEmpty 231ns ± 1% 217ns ± 0% -6.06% (p=0.008 n=5+5)
FmtFprintfString 407ns ± 3% 375ns ± 2% -7.81% (p=0.008 n=5+5)
FmtFprintfInt 466ns ± 2% 430ns ± 0% -7.79% (p=0.016 n=5+4)
FmtFprintfIntInt 719ns ± 2% 673ns ± 2% -6.37% (p=0.008 n=5+5)
FmtFprintfPrefixedInt 706ns ± 1% 676ns ± 3% -4.31% (p=0.008 n=5+5)
FmtFprintfFloat 1.01µs ± 1% 0.97µs ± 1% -4.30% (p=0.008 n=5+5)
FmtManyArgs 2.67µs ± 1% 2.51µs ± 1% -5.95% (p=0.008 n=5+5)
GobDecode 30.7ms ± 9% 31.3ms ±34% ~ (p=0.222 n=5+5)
GobEncode 24.2ms ±23% 20.2ms ± 0% -16.36% (p=0.016 n=5+4)
Gzip 852ms ± 0% 823ms ± 0% -3.38% (p=0.016 n=4+5)
Gunzip 160ms ± 1% 151ms ± 1% -5.37% (p=0.008 n=5+5)
JSONEncode 35.7ms ± 1% 34.3ms ± 1% -3.81% (p=0.008 n=5+5)
JSONDecode 247ms ± 8% 254ms ± 7% ~ (p=0.548 n=5+5)
Mandelbrot200 5.39ms ± 0% 5.41ms ± 0% +0.42% (p=0.008 n=5+5)
GoParse 18.5ms ± 1% 18.3ms ± 2% ~ (p=0.343 n=4+4)
RegexpMatchEasy0_32 424ns ± 2% 397ns ± 0% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 2.88µs ± 0% 2.86µs ± 1% ~ (p=0.079 n=5+5)
RegexpMatchEasy1_32 395ns ± 2% 370ns ± 1% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 3.26µs ± 0% 3.19µs ± 1% -2.06% (p=0.008 n=5+5)
RegexpMatchMedium_32 564ns ± 1% 532ns ± 0% -5.71% (p=0.008 n=5+5)
RegexpMatchMedium_1K 146µs ± 2% 140µs ± 1% -4.62% (p=0.008 n=5+5)
RegexpMatchHard_32 8.47µs ± 1% 7.91µs ± 1% -6.65% (p=0.008 n=5+5)
RegexpMatchHard_1K 253µs ± 1% 236µs ± 2% -6.66% (p=0.008 n=5+5)
Revcomp 1.78s ± 4% 1.76s ± 5% ~ (p=1.000 n=5+5)
Template 292ms ±29% 269ms ± 5% ~ (p=0.690 n=5+5)
TimeParse 1.61µs ± 4% 1.54µs ± 1% -4.42% (p=0.008 n=5+5)
TimeFormat 1.66µs ± 3% 1.58µs ± 1% -5.22% (p=0.008 n=5+5)
[Geo mean] 232µs 221µs -4.54%
name old speed new speed delta
GobDecode 25.0MB/s ± 8% 25.1MB/s ±27% ~ (p=0.222 n=5+5)
GobEncode 32.8MB/s ±21% 38.0MB/s ± 0% +15.84% (p=0.016 n=5+4)
Gzip 22.8MB/s ± 0% 23.6MB/s ± 0% +3.49% (p=0.016 n=4+5)
Gunzip 121MB/s ± 1% 128MB/s ± 1% +5.68% (p=0.008 n=5+5)
JSONEncode 54.4MB/s ± 1% 56.5MB/s ± 1% +3.97% (p=0.008 n=5+5)
JSONDecode 7.88MB/s ± 8% 7.65MB/s ± 8% ~ (p=0.548 n=5+5)
GoParse 3.07MB/s ± 8% 3.00MB/s ±22% ~ (p=0.579 n=5+5)
RegexpMatchEasy0_32 75.6MB/s ± 2% 80.5MB/s ± 0% +6.58% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 356MB/s ± 0% 358MB/s ± 1% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32 81.1MB/s ± 2% 86.5MB/s ± 1% +6.69% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 314MB/s ± 0% 320MB/s ± 0% +2.10% (p=0.008 n=5+5)
RegexpMatchMedium_32 1.77MB/s ± 1% 1.88MB/s ± 0% +6.09% (p=0.016 n=5+4)
RegexpMatchMedium_1K 6.99MB/s ± 2% 7.33MB/s ± 1% +4.83% (p=0.008 n=5+5)
RegexpMatchHard_32 3.78MB/s ± 1% 4.04MB/s ± 1% +7.04% (p=0.008 n=5+5)
RegexpMatchHard_1K 4.04MB/s ± 1% 4.33MB/s ± 2% +7.17% (p=0.008 n=5+5)
Revcomp 143MB/s ± 4% 145MB/s ± 5% ~ (p=1.000 n=5+5)
Template 6.77MB/s ±24% 7.22MB/s ± 5% ~ (p=0.690 n=5+5)
[Geo mean] 24.4MB/s 25.4MB/s +4.18%
Change-Id: Ib80716e62992aec28b2c4a96af280c278f83aa49
Reviewed-on: https://go-review.googlesource.com/c/go/+/173980
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2019-04-25 17:25:54 -04:00
|
|
|
|
2018-03-29 00:55:53 +02:00
|
|
|
case obj.TYPE_NONE:
|
|
|
|
|
// (target PC is on stack)
|
|
|
|
|
p = appendp(p, AI32WrapI64)
|
|
|
|
|
p = appendp(p, AI32Const, constAddr(16)) // only needs PC_F bits (16-31), PC_B bits (0-15) are zero
|
|
|
|
|
p = appendp(p, AI32ShrU)
|
cmd, runtime: remove PC_F & PC_B globals on Wasm
Following the previous CL, this removes more global variables on
Wasm.
PC_B is used mostly for intra-function jumps, and for a function
telling its callee where to start or resume. This usage can be
served by a parameter. The top level loop (wasm_pc_f_loop) uses
PC_B for resuming a function. This value is either set by gogo,
or loaded from the Go stack at function return. Instead of
loading PC_B at each function return, we could make gogo stores
PC_B at the same stack location, and let the top level loop do
the load. This way, we don't need to use global PC_B to
communicate with the top level loop, and we can replace global
PC_B with a parameter.
PC_F is similar. It is even more so in that the only reader is
the top level loop. Let the top level loop read it from the stack,
and we can get rid of PC_F entirely.
PC_F and PC_B are used less entensively as SP, so this CL has
smaller performance gain.
Running on Chrome 74.0.3729.108 on Linux/AMD64,
name old time/op new time/op delta
BinaryTree17 16.6s ± 0% 16.2s ± 1% -2.59% (p=0.016 n=4+5)
Fannkuch11 11.1s ± 1% 10.8s ± 0% -2.65% (p=0.008 n=5+5)
FmtFprintfEmpty 231ns ± 1% 217ns ± 0% -6.06% (p=0.008 n=5+5)
FmtFprintfString 407ns ± 3% 375ns ± 2% -7.81% (p=0.008 n=5+5)
FmtFprintfInt 466ns ± 2% 430ns ± 0% -7.79% (p=0.016 n=5+4)
FmtFprintfIntInt 719ns ± 2% 673ns ± 2% -6.37% (p=0.008 n=5+5)
FmtFprintfPrefixedInt 706ns ± 1% 676ns ± 3% -4.31% (p=0.008 n=5+5)
FmtFprintfFloat 1.01µs ± 1% 0.97µs ± 1% -4.30% (p=0.008 n=5+5)
FmtManyArgs 2.67µs ± 1% 2.51µs ± 1% -5.95% (p=0.008 n=5+5)
GobDecode 30.7ms ± 9% 31.3ms ±34% ~ (p=0.222 n=5+5)
GobEncode 24.2ms ±23% 20.2ms ± 0% -16.36% (p=0.016 n=5+4)
Gzip 852ms ± 0% 823ms ± 0% -3.38% (p=0.016 n=4+5)
Gunzip 160ms ± 1% 151ms ± 1% -5.37% (p=0.008 n=5+5)
JSONEncode 35.7ms ± 1% 34.3ms ± 1% -3.81% (p=0.008 n=5+5)
JSONDecode 247ms ± 8% 254ms ± 7% ~ (p=0.548 n=5+5)
Mandelbrot200 5.39ms ± 0% 5.41ms ± 0% +0.42% (p=0.008 n=5+5)
GoParse 18.5ms ± 1% 18.3ms ± 2% ~ (p=0.343 n=4+4)
RegexpMatchEasy0_32 424ns ± 2% 397ns ± 0% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 2.88µs ± 0% 2.86µs ± 1% ~ (p=0.079 n=5+5)
RegexpMatchEasy1_32 395ns ± 2% 370ns ± 1% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 3.26µs ± 0% 3.19µs ± 1% -2.06% (p=0.008 n=5+5)
RegexpMatchMedium_32 564ns ± 1% 532ns ± 0% -5.71% (p=0.008 n=5+5)
RegexpMatchMedium_1K 146µs ± 2% 140µs ± 1% -4.62% (p=0.008 n=5+5)
RegexpMatchHard_32 8.47µs ± 1% 7.91µs ± 1% -6.65% (p=0.008 n=5+5)
RegexpMatchHard_1K 253µs ± 1% 236µs ± 2% -6.66% (p=0.008 n=5+5)
Revcomp 1.78s ± 4% 1.76s ± 5% ~ (p=1.000 n=5+5)
Template 292ms ±29% 269ms ± 5% ~ (p=0.690 n=5+5)
TimeParse 1.61µs ± 4% 1.54µs ± 1% -4.42% (p=0.008 n=5+5)
TimeFormat 1.66µs ± 3% 1.58µs ± 1% -5.22% (p=0.008 n=5+5)
[Geo mean] 232µs 221µs -4.54%
name old speed new speed delta
GobDecode 25.0MB/s ± 8% 25.1MB/s ±27% ~ (p=0.222 n=5+5)
GobEncode 32.8MB/s ±21% 38.0MB/s ± 0% +15.84% (p=0.016 n=5+4)
Gzip 22.8MB/s ± 0% 23.6MB/s ± 0% +3.49% (p=0.016 n=4+5)
Gunzip 121MB/s ± 1% 128MB/s ± 1% +5.68% (p=0.008 n=5+5)
JSONEncode 54.4MB/s ± 1% 56.5MB/s ± 1% +3.97% (p=0.008 n=5+5)
JSONDecode 7.88MB/s ± 8% 7.65MB/s ± 8% ~ (p=0.548 n=5+5)
GoParse 3.07MB/s ± 8% 3.00MB/s ±22% ~ (p=0.579 n=5+5)
RegexpMatchEasy0_32 75.6MB/s ± 2% 80.5MB/s ± 0% +6.58% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 356MB/s ± 0% 358MB/s ± 1% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32 81.1MB/s ± 2% 86.5MB/s ± 1% +6.69% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 314MB/s ± 0% 320MB/s ± 0% +2.10% (p=0.008 n=5+5)
RegexpMatchMedium_32 1.77MB/s ± 1% 1.88MB/s ± 0% +6.09% (p=0.016 n=5+4)
RegexpMatchMedium_1K 6.99MB/s ± 2% 7.33MB/s ± 1% +4.83% (p=0.008 n=5+5)
RegexpMatchHard_32 3.78MB/s ± 1% 4.04MB/s ± 1% +7.04% (p=0.008 n=5+5)
RegexpMatchHard_1K 4.04MB/s ± 1% 4.33MB/s ± 2% +7.17% (p=0.008 n=5+5)
Revcomp 143MB/s ± 4% 145MB/s ± 5% ~ (p=1.000 n=5+5)
Template 6.77MB/s ±24% 7.22MB/s ± 5% ~ (p=0.690 n=5+5)
[Geo mean] 24.4MB/s 25.4MB/s +4.18%
Change-Id: Ib80716e62992aec28b2c4a96af280c278f83aa49
Reviewed-on: https://go-review.googlesource.com/c/go/+/173980
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2019-04-25 17:25:54 -04:00
|
|
|
|
|
|
|
|
// Set PC_B parameter to function entry.
|
|
|
|
|
// We need to push this before pushing the target PC_F,
|
|
|
|
|
// so temporarily pop PC_F, using our PC_B as a
|
|
|
|
|
// scratch register, and push it back after pushing 0.
|
|
|
|
|
p = appendp(p, ASet, regAddr(REG_PC_B))
|
|
|
|
|
p = appendp(p, AI32Const, constAddr(0))
|
|
|
|
|
p = appendp(p, AGet, regAddr(REG_PC_B))
|
|
|
|
|
|
2018-03-29 00:55:53 +02:00
|
|
|
p = appendp(p, ACallIndirect)
|
cmd, runtime: remove PC_F & PC_B globals on Wasm
Following the previous CL, this removes more global variables on
Wasm.
PC_B is used mostly for intra-function jumps, and for a function
telling its callee where to start or resume. This usage can be
served by a parameter. The top level loop (wasm_pc_f_loop) uses
PC_B for resuming a function. This value is either set by gogo,
or loaded from the Go stack at function return. Instead of
loading PC_B at each function return, we could make gogo stores
PC_B at the same stack location, and let the top level loop do
the load. This way, we don't need to use global PC_B to
communicate with the top level loop, and we can replace global
PC_B with a parameter.
PC_F is similar. It is even more so in that the only reader is
the top level loop. Let the top level loop read it from the stack,
and we can get rid of PC_F entirely.
PC_F and PC_B are used less entensively as SP, so this CL has
smaller performance gain.
Running on Chrome 74.0.3729.108 on Linux/AMD64,
name old time/op new time/op delta
BinaryTree17 16.6s ± 0% 16.2s ± 1% -2.59% (p=0.016 n=4+5)
Fannkuch11 11.1s ± 1% 10.8s ± 0% -2.65% (p=0.008 n=5+5)
FmtFprintfEmpty 231ns ± 1% 217ns ± 0% -6.06% (p=0.008 n=5+5)
FmtFprintfString 407ns ± 3% 375ns ± 2% -7.81% (p=0.008 n=5+5)
FmtFprintfInt 466ns ± 2% 430ns ± 0% -7.79% (p=0.016 n=5+4)
FmtFprintfIntInt 719ns ± 2% 673ns ± 2% -6.37% (p=0.008 n=5+5)
FmtFprintfPrefixedInt 706ns ± 1% 676ns ± 3% -4.31% (p=0.008 n=5+5)
FmtFprintfFloat 1.01µs ± 1% 0.97µs ± 1% -4.30% (p=0.008 n=5+5)
FmtManyArgs 2.67µs ± 1% 2.51µs ± 1% -5.95% (p=0.008 n=5+5)
GobDecode 30.7ms ± 9% 31.3ms ±34% ~ (p=0.222 n=5+5)
GobEncode 24.2ms ±23% 20.2ms ± 0% -16.36% (p=0.016 n=5+4)
Gzip 852ms ± 0% 823ms ± 0% -3.38% (p=0.016 n=4+5)
Gunzip 160ms ± 1% 151ms ± 1% -5.37% (p=0.008 n=5+5)
JSONEncode 35.7ms ± 1% 34.3ms ± 1% -3.81% (p=0.008 n=5+5)
JSONDecode 247ms ± 8% 254ms ± 7% ~ (p=0.548 n=5+5)
Mandelbrot200 5.39ms ± 0% 5.41ms ± 0% +0.42% (p=0.008 n=5+5)
GoParse 18.5ms ± 1% 18.3ms ± 2% ~ (p=0.343 n=4+4)
RegexpMatchEasy0_32 424ns ± 2% 397ns ± 0% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 2.88µs ± 0% 2.86µs ± 1% ~ (p=0.079 n=5+5)
RegexpMatchEasy1_32 395ns ± 2% 370ns ± 1% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 3.26µs ± 0% 3.19µs ± 1% -2.06% (p=0.008 n=5+5)
RegexpMatchMedium_32 564ns ± 1% 532ns ± 0% -5.71% (p=0.008 n=5+5)
RegexpMatchMedium_1K 146µs ± 2% 140µs ± 1% -4.62% (p=0.008 n=5+5)
RegexpMatchHard_32 8.47µs ± 1% 7.91µs ± 1% -6.65% (p=0.008 n=5+5)
RegexpMatchHard_1K 253µs ± 1% 236µs ± 2% -6.66% (p=0.008 n=5+5)
Revcomp 1.78s ± 4% 1.76s ± 5% ~ (p=1.000 n=5+5)
Template 292ms ±29% 269ms ± 5% ~ (p=0.690 n=5+5)
TimeParse 1.61µs ± 4% 1.54µs ± 1% -4.42% (p=0.008 n=5+5)
TimeFormat 1.66µs ± 3% 1.58µs ± 1% -5.22% (p=0.008 n=5+5)
[Geo mean] 232µs 221µs -4.54%
name old speed new speed delta
GobDecode 25.0MB/s ± 8% 25.1MB/s ±27% ~ (p=0.222 n=5+5)
GobEncode 32.8MB/s ±21% 38.0MB/s ± 0% +15.84% (p=0.016 n=5+4)
Gzip 22.8MB/s ± 0% 23.6MB/s ± 0% +3.49% (p=0.016 n=4+5)
Gunzip 121MB/s ± 1% 128MB/s ± 1% +5.68% (p=0.008 n=5+5)
JSONEncode 54.4MB/s ± 1% 56.5MB/s ± 1% +3.97% (p=0.008 n=5+5)
JSONDecode 7.88MB/s ± 8% 7.65MB/s ± 8% ~ (p=0.548 n=5+5)
GoParse 3.07MB/s ± 8% 3.00MB/s ±22% ~ (p=0.579 n=5+5)
RegexpMatchEasy0_32 75.6MB/s ± 2% 80.5MB/s ± 0% +6.58% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 356MB/s ± 0% 358MB/s ± 1% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32 81.1MB/s ± 2% 86.5MB/s ± 1% +6.69% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 314MB/s ± 0% 320MB/s ± 0% +2.10% (p=0.008 n=5+5)
RegexpMatchMedium_32 1.77MB/s ± 1% 1.88MB/s ± 0% +6.09% (p=0.016 n=5+4)
RegexpMatchMedium_1K 6.99MB/s ± 2% 7.33MB/s ± 1% +4.83% (p=0.008 n=5+5)
RegexpMatchHard_32 3.78MB/s ± 1% 4.04MB/s ± 1% +7.04% (p=0.008 n=5+5)
RegexpMatchHard_1K 4.04MB/s ± 1% 4.33MB/s ± 2% +7.17% (p=0.008 n=5+5)
Revcomp 143MB/s ± 4% 145MB/s ± 5% ~ (p=1.000 n=5+5)
Template 6.77MB/s ±24% 7.22MB/s ± 5% ~ (p=0.690 n=5+5)
[Geo mean] 24.4MB/s 25.4MB/s +4.18%
Change-Id: Ib80716e62992aec28b2c4a96af280c278f83aa49
Reviewed-on: https://go-review.googlesource.com/c/go/+/173980
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2019-04-25 17:25:54 -04:00
|
|
|
|
2018-03-29 00:55:53 +02:00
|
|
|
default:
|
|
|
|
|
panic("bad target for CALL")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// return value of call is on the top of the stack, indicating whether to unwind the WebAssembly stack
|
2020-10-14 08:36:11 -04:00
|
|
|
if call.As == ACALLNORESUME && call.To.Sym != sigpanic { // sigpanic unwinds the stack, but it never resumes
|
2018-03-29 00:55:53 +02:00
|
|
|
// trying to unwind WebAssembly stack but call has no resume point, terminate with error
|
2019-03-17 13:45:46 +01:00
|
|
|
p = appendp(p, AIf)
|
2018-03-29 00:55:53 +02:00
|
|
|
p = appendp(p, obj.AUNDEF)
|
2019-03-17 13:45:46 +01:00
|
|
|
p = appendp(p, AEnd)
|
2018-03-29 00:55:53 +02:00
|
|
|
} else {
|
|
|
|
|
// unwinding WebAssembly stack to switch goroutine, return 1
|
2019-03-17 13:45:46 +01:00
|
|
|
p = appendp(p, ABrIf)
|
|
|
|
|
unwindExitBranches = append(unwindExitBranches, p)
|
2018-03-29 00:55:53 +02:00
|
|
|
}
|
|
|
|
|
|
2018-05-20 00:56:36 +02:00
|
|
|
case obj.ARET, ARETUNWIND:
|
2018-03-29 00:55:53 +02:00
|
|
|
ret := *p
|
|
|
|
|
p.As = obj.ANOP
|
|
|
|
|
|
|
|
|
|
if framesize > 0 {
|
|
|
|
|
// SP += framesize
|
|
|
|
|
p = appendp(p, AGet, regAddr(REG_SP))
|
|
|
|
|
p = appendp(p, AI32Const, constAddr(framesize))
|
|
|
|
|
p = appendp(p, AI32Add)
|
|
|
|
|
p = appendp(p, ASet, regAddr(REG_SP))
|
|
|
|
|
// TODO(neelance): This should theoretically set Spadj, but it only works without.
|
|
|
|
|
// p.Spadj = int32(-framesize)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ret.To.Type == obj.TYPE_MEM {
|
cmd, runtime: remove PC_F & PC_B globals on Wasm
Following the previous CL, this removes more global variables on
Wasm.
PC_B is used mostly for intra-function jumps, and for a function
telling its callee where to start or resume. This usage can be
served by a parameter. The top level loop (wasm_pc_f_loop) uses
PC_B for resuming a function. This value is either set by gogo,
or loaded from the Go stack at function return. Instead of
loading PC_B at each function return, we could make gogo stores
PC_B at the same stack location, and let the top level loop do
the load. This way, we don't need to use global PC_B to
communicate with the top level loop, and we can replace global
PC_B with a parameter.
PC_F is similar. It is even more so in that the only reader is
the top level loop. Let the top level loop read it from the stack,
and we can get rid of PC_F entirely.
PC_F and PC_B are used less entensively as SP, so this CL has
smaller performance gain.
Running on Chrome 74.0.3729.108 on Linux/AMD64,
name old time/op new time/op delta
BinaryTree17 16.6s ± 0% 16.2s ± 1% -2.59% (p=0.016 n=4+5)
Fannkuch11 11.1s ± 1% 10.8s ± 0% -2.65% (p=0.008 n=5+5)
FmtFprintfEmpty 231ns ± 1% 217ns ± 0% -6.06% (p=0.008 n=5+5)
FmtFprintfString 407ns ± 3% 375ns ± 2% -7.81% (p=0.008 n=5+5)
FmtFprintfInt 466ns ± 2% 430ns ± 0% -7.79% (p=0.016 n=5+4)
FmtFprintfIntInt 719ns ± 2% 673ns ± 2% -6.37% (p=0.008 n=5+5)
FmtFprintfPrefixedInt 706ns ± 1% 676ns ± 3% -4.31% (p=0.008 n=5+5)
FmtFprintfFloat 1.01µs ± 1% 0.97µs ± 1% -4.30% (p=0.008 n=5+5)
FmtManyArgs 2.67µs ± 1% 2.51µs ± 1% -5.95% (p=0.008 n=5+5)
GobDecode 30.7ms ± 9% 31.3ms ±34% ~ (p=0.222 n=5+5)
GobEncode 24.2ms ±23% 20.2ms ± 0% -16.36% (p=0.016 n=5+4)
Gzip 852ms ± 0% 823ms ± 0% -3.38% (p=0.016 n=4+5)
Gunzip 160ms ± 1% 151ms ± 1% -5.37% (p=0.008 n=5+5)
JSONEncode 35.7ms ± 1% 34.3ms ± 1% -3.81% (p=0.008 n=5+5)
JSONDecode 247ms ± 8% 254ms ± 7% ~ (p=0.548 n=5+5)
Mandelbrot200 5.39ms ± 0% 5.41ms ± 0% +0.42% (p=0.008 n=5+5)
GoParse 18.5ms ± 1% 18.3ms ± 2% ~ (p=0.343 n=4+4)
RegexpMatchEasy0_32 424ns ± 2% 397ns ± 0% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 2.88µs ± 0% 2.86µs ± 1% ~ (p=0.079 n=5+5)
RegexpMatchEasy1_32 395ns ± 2% 370ns ± 1% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 3.26µs ± 0% 3.19µs ± 1% -2.06% (p=0.008 n=5+5)
RegexpMatchMedium_32 564ns ± 1% 532ns ± 0% -5.71% (p=0.008 n=5+5)
RegexpMatchMedium_1K 146µs ± 2% 140µs ± 1% -4.62% (p=0.008 n=5+5)
RegexpMatchHard_32 8.47µs ± 1% 7.91µs ± 1% -6.65% (p=0.008 n=5+5)
RegexpMatchHard_1K 253µs ± 1% 236µs ± 2% -6.66% (p=0.008 n=5+5)
Revcomp 1.78s ± 4% 1.76s ± 5% ~ (p=1.000 n=5+5)
Template 292ms ±29% 269ms ± 5% ~ (p=0.690 n=5+5)
TimeParse 1.61µs ± 4% 1.54µs ± 1% -4.42% (p=0.008 n=5+5)
TimeFormat 1.66µs ± 3% 1.58µs ± 1% -5.22% (p=0.008 n=5+5)
[Geo mean] 232µs 221µs -4.54%
name old speed new speed delta
GobDecode 25.0MB/s ± 8% 25.1MB/s ±27% ~ (p=0.222 n=5+5)
GobEncode 32.8MB/s ±21% 38.0MB/s ± 0% +15.84% (p=0.016 n=5+4)
Gzip 22.8MB/s ± 0% 23.6MB/s ± 0% +3.49% (p=0.016 n=4+5)
Gunzip 121MB/s ± 1% 128MB/s ± 1% +5.68% (p=0.008 n=5+5)
JSONEncode 54.4MB/s ± 1% 56.5MB/s ± 1% +3.97% (p=0.008 n=5+5)
JSONDecode 7.88MB/s ± 8% 7.65MB/s ± 8% ~ (p=0.548 n=5+5)
GoParse 3.07MB/s ± 8% 3.00MB/s ±22% ~ (p=0.579 n=5+5)
RegexpMatchEasy0_32 75.6MB/s ± 2% 80.5MB/s ± 0% +6.58% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 356MB/s ± 0% 358MB/s ± 1% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32 81.1MB/s ± 2% 86.5MB/s ± 1% +6.69% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 314MB/s ± 0% 320MB/s ± 0% +2.10% (p=0.008 n=5+5)
RegexpMatchMedium_32 1.77MB/s ± 1% 1.88MB/s ± 0% +6.09% (p=0.016 n=5+4)
RegexpMatchMedium_1K 6.99MB/s ± 2% 7.33MB/s ± 1% +4.83% (p=0.008 n=5+5)
RegexpMatchHard_32 3.78MB/s ± 1% 4.04MB/s ± 1% +7.04% (p=0.008 n=5+5)
RegexpMatchHard_1K 4.04MB/s ± 1% 4.33MB/s ± 2% +7.17% (p=0.008 n=5+5)
Revcomp 143MB/s ± 4% 145MB/s ± 5% ~ (p=1.000 n=5+5)
Template 6.77MB/s ±24% 7.22MB/s ± 5% ~ (p=0.690 n=5+5)
[Geo mean] 24.4MB/s 25.4MB/s +4.18%
Change-Id: Ib80716e62992aec28b2c4a96af280c278f83aa49
Reviewed-on: https://go-review.googlesource.com/c/go/+/173980
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2019-04-25 17:25:54 -04:00
|
|
|
// Set PC_B parameter to function entry.
|
2018-03-29 00:55:53 +02:00
|
|
|
p = appendp(p, AI32Const, constAddr(0))
|
|
|
|
|
|
|
|
|
|
// low-level WebAssembly call to function
|
|
|
|
|
p = appendp(p, ACall, ret.To)
|
|
|
|
|
p = appendp(p, AReturn)
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// SP += 8
|
|
|
|
|
p = appendp(p, AGet, regAddr(REG_SP))
|
|
|
|
|
p = appendp(p, AI32Const, constAddr(8))
|
|
|
|
|
p = appendp(p, AI32Add)
|
|
|
|
|
p = appendp(p, ASet, regAddr(REG_SP))
|
|
|
|
|
|
2018-05-20 00:56:36 +02:00
|
|
|
if ret.As == ARETUNWIND {
|
|
|
|
|
// function needs to unwind the WebAssembly stack, return 1
|
|
|
|
|
p = appendp(p, AI32Const, constAddr(1))
|
|
|
|
|
p = appendp(p, AReturn)
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// not unwinding the WebAssembly stack, return 0
|
2018-03-29 00:55:53 +02:00
|
|
|
p = appendp(p, AI32Const, constAddr(0))
|
|
|
|
|
p = appendp(p, AReturn)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-19 00:30:12 -04:00
|
|
|
for p := s.Func().Text; p != nil; p = p.Link {
|
2018-03-29 00:55:53 +02:00
|
|
|
switch p.From.Name {
|
|
|
|
|
case obj.NAME_AUTO:
|
2022-10-05 03:15:24 +00:00
|
|
|
p.From.Offset += framesize
|
2018-03-29 00:55:53 +02:00
|
|
|
case obj.NAME_PARAM:
|
|
|
|
|
p.From.Reg = REG_SP
|
2022-10-05 03:15:24 +00:00
|
|
|
p.From.Offset += framesize + 8 // parameters are after the frame and the 8-byte return address
|
2018-03-29 00:55:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch p.To.Name {
|
|
|
|
|
case obj.NAME_AUTO:
|
2022-10-05 03:15:24 +00:00
|
|
|
p.To.Offset += framesize
|
2018-03-29 00:55:53 +02:00
|
|
|
case obj.NAME_PARAM:
|
|
|
|
|
p.To.Reg = REG_SP
|
2022-10-05 03:15:24 +00:00
|
|
|
p.To.Offset += framesize + 8 // parameters are after the frame and the 8-byte return address
|
2018-03-29 00:55:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch p.As {
|
|
|
|
|
case AGet:
|
|
|
|
|
if p.From.Type == obj.TYPE_ADDR {
|
|
|
|
|
get := *p
|
|
|
|
|
p.As = obj.ANOP
|
|
|
|
|
|
|
|
|
|
switch get.From.Name {
|
|
|
|
|
case obj.NAME_EXTERN:
|
|
|
|
|
p = appendp(p, AI64Const, get.From)
|
|
|
|
|
case obj.NAME_AUTO, obj.NAME_PARAM:
|
|
|
|
|
p = appendp(p, AGet, regAddr(get.From.Reg))
|
|
|
|
|
if get.From.Reg == REG_SP {
|
2018-12-12 13:04:44 +01:00
|
|
|
p = appendp(p, AI64ExtendI32U)
|
2018-03-29 00:55:53 +02:00
|
|
|
}
|
|
|
|
|
if get.From.Offset != 0 {
|
|
|
|
|
p = appendp(p, AI64Const, constAddr(get.From.Offset))
|
|
|
|
|
p = appendp(p, AI64Add)
|
|
|
|
|
}
|
|
|
|
|
default:
|
|
|
|
|
panic("bad Get: invalid name")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case AI32Load, AI64Load, AF32Load, AF64Load, AI32Load8S, AI32Load8U, AI32Load16S, AI32Load16U, AI64Load8S, AI64Load8U, AI64Load16S, AI64Load16U, AI64Load32S, AI64Load32U:
|
|
|
|
|
if p.From.Type == obj.TYPE_MEM {
|
|
|
|
|
as := p.As
|
|
|
|
|
from := p.From
|
|
|
|
|
|
|
|
|
|
p.As = AGet
|
|
|
|
|
p.From = regAddr(from.Reg)
|
|
|
|
|
|
|
|
|
|
if from.Reg != REG_SP {
|
|
|
|
|
p = appendp(p, AI32WrapI64)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
p = appendp(p, as, constAddr(from.Offset))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case AMOVB, AMOVH, AMOVW, AMOVD:
|
|
|
|
|
mov := *p
|
|
|
|
|
p.As = obj.ANOP
|
|
|
|
|
|
|
|
|
|
var loadAs obj.As
|
|
|
|
|
var storeAs obj.As
|
|
|
|
|
switch mov.As {
|
|
|
|
|
case AMOVB:
|
|
|
|
|
loadAs = AI64Load8U
|
|
|
|
|
storeAs = AI64Store8
|
|
|
|
|
case AMOVH:
|
|
|
|
|
loadAs = AI64Load16U
|
|
|
|
|
storeAs = AI64Store16
|
|
|
|
|
case AMOVW:
|
|
|
|
|
loadAs = AI64Load32U
|
|
|
|
|
storeAs = AI64Store32
|
|
|
|
|
case AMOVD:
|
|
|
|
|
loadAs = AI64Load
|
|
|
|
|
storeAs = AI64Store
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
appendValue := func() {
|
|
|
|
|
switch mov.From.Type {
|
|
|
|
|
case obj.TYPE_CONST:
|
|
|
|
|
p = appendp(p, AI64Const, constAddr(mov.From.Offset))
|
|
|
|
|
|
|
|
|
|
case obj.TYPE_ADDR:
|
|
|
|
|
switch mov.From.Name {
|
|
|
|
|
case obj.NAME_NONE, obj.NAME_PARAM, obj.NAME_AUTO:
|
|
|
|
|
p = appendp(p, AGet, regAddr(mov.From.Reg))
|
|
|
|
|
if mov.From.Reg == REG_SP {
|
2018-12-12 13:04:44 +01:00
|
|
|
p = appendp(p, AI64ExtendI32U)
|
2018-03-29 00:55:53 +02:00
|
|
|
}
|
|
|
|
|
p = appendp(p, AI64Const, constAddr(mov.From.Offset))
|
|
|
|
|
p = appendp(p, AI64Add)
|
|
|
|
|
case obj.NAME_EXTERN:
|
|
|
|
|
p = appendp(p, AI64Const, mov.From)
|
|
|
|
|
default:
|
|
|
|
|
panic("bad name for MOV")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case obj.TYPE_REG:
|
|
|
|
|
p = appendp(p, AGet, mov.From)
|
|
|
|
|
if mov.From.Reg == REG_SP {
|
2018-12-12 13:04:44 +01:00
|
|
|
p = appendp(p, AI64ExtendI32U)
|
2018-03-29 00:55:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case obj.TYPE_MEM:
|
|
|
|
|
p = appendp(p, AGet, regAddr(mov.From.Reg))
|
|
|
|
|
if mov.From.Reg != REG_SP {
|
|
|
|
|
p = appendp(p, AI32WrapI64)
|
|
|
|
|
}
|
|
|
|
|
p = appendp(p, loadAs, constAddr(mov.From.Offset))
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
panic("bad MOV type")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch mov.To.Type {
|
|
|
|
|
case obj.TYPE_REG:
|
|
|
|
|
appendValue()
|
|
|
|
|
if mov.To.Reg == REG_SP {
|
|
|
|
|
p = appendp(p, AI32WrapI64)
|
|
|
|
|
}
|
|
|
|
|
p = appendp(p, ASet, mov.To)
|
|
|
|
|
|
|
|
|
|
case obj.TYPE_MEM:
|
|
|
|
|
switch mov.To.Name {
|
|
|
|
|
case obj.NAME_NONE, obj.NAME_PARAM:
|
|
|
|
|
p = appendp(p, AGet, regAddr(mov.To.Reg))
|
|
|
|
|
if mov.To.Reg != REG_SP {
|
|
|
|
|
p = appendp(p, AI32WrapI64)
|
|
|
|
|
}
|
|
|
|
|
case obj.NAME_EXTERN:
|
|
|
|
|
p = appendp(p, AI32Const, obj.Addr{Type: obj.TYPE_ADDR, Name: obj.NAME_EXTERN, Sym: mov.To.Sym})
|
|
|
|
|
default:
|
|
|
|
|
panic("bad MOV name")
|
|
|
|
|
}
|
|
|
|
|
appendValue()
|
|
|
|
|
p = appendp(p, storeAs, constAddr(mov.To.Offset))
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
panic("bad MOV type")
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-03-17 13:45:46 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
{
|
2020-07-19 00:30:12 -04:00
|
|
|
p := s.Func().Text
|
2019-03-17 13:45:46 +01:00
|
|
|
if len(unwindExitBranches) > 0 {
|
|
|
|
|
p = appendp(p, ABlock) // unwindExit, used to return 1 when unwinding the stack
|
|
|
|
|
for _, b := range unwindExitBranches {
|
|
|
|
|
b.To = obj.Addr{Type: obj.TYPE_BRANCH, Val: p}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if len(entryPointLoopBranches) > 0 {
|
|
|
|
|
p = appendp(p, ALoop) // entryPointLoop, used to jump between basic blocks
|
|
|
|
|
for _, b := range entryPointLoopBranches {
|
|
|
|
|
b.To = obj.Addr{Type: obj.TYPE_BRANCH, Val: p}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if numResumePoints > 0 {
|
|
|
|
|
// Add Block instructions for resume points and BrTable to jump to selected resume point.
|
|
|
|
|
for i := 0; i < numResumePoints+1; i++ {
|
|
|
|
|
p = appendp(p, ABlock)
|
|
|
|
|
}
|
|
|
|
|
p = appendp(p, AGet, regAddr(REG_PC_B)) // read next basic block from PC_B
|
|
|
|
|
p = appendp(p, ABrTable, obj.Addr{Val: tableIdxs})
|
|
|
|
|
p = appendp(p, AEnd) // end of Block
|
|
|
|
|
}
|
|
|
|
|
for p.Link != nil {
|
|
|
|
|
p = p.Link // function instructions
|
|
|
|
|
}
|
|
|
|
|
if len(entryPointLoopBranches) > 0 {
|
|
|
|
|
p = appendp(p, AEnd) // end of entryPointLoop
|
|
|
|
|
}
|
|
|
|
|
p = appendp(p, obj.AUNDEF)
|
|
|
|
|
if len(unwindExitBranches) > 0 {
|
|
|
|
|
p = appendp(p, AEnd) // end of unwindExit
|
|
|
|
|
p = appendp(p, AI32Const, constAddr(1))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
currentDepth = 0
|
|
|
|
|
blockDepths := make(map[*obj.Prog]int)
|
2020-07-19 00:30:12 -04:00
|
|
|
for p := s.Func().Text; p != nil; p = p.Link {
|
2019-03-17 13:45:46 +01:00
|
|
|
switch p.As {
|
|
|
|
|
case ABlock, ALoop, AIf:
|
|
|
|
|
currentDepth++
|
|
|
|
|
blockDepths[p] = currentDepth
|
|
|
|
|
case AEnd:
|
|
|
|
|
currentDepth--
|
|
|
|
|
}
|
2018-03-29 00:55:53 +02:00
|
|
|
|
2019-03-17 13:45:46 +01:00
|
|
|
switch p.As {
|
|
|
|
|
case ABr, ABrIf:
|
|
|
|
|
if p.To.Type == obj.TYPE_BRANCH {
|
|
|
|
|
blockDepth, ok := blockDepths[p.To.Val.(*obj.Prog)]
|
|
|
|
|
if !ok {
|
|
|
|
|
panic("label not at block")
|
|
|
|
|
}
|
|
|
|
|
p.To = constAddr(int64(currentDepth - blockDepth))
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-03-29 00:55:53 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func constAddr(value int64) obj.Addr {
|
|
|
|
|
return obj.Addr{Type: obj.TYPE_CONST, Offset: value}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func regAddr(reg int16) obj.Addr {
|
|
|
|
|
return obj.Addr{Type: obj.TYPE_REG, Reg: reg}
|
|
|
|
|
}
|
|
|
|
|
|
cmd, runtime: remove PC_F & PC_B globals on Wasm
Following the previous CL, this removes more global variables on
Wasm.
PC_B is used mostly for intra-function jumps, and for a function
telling its callee where to start or resume. This usage can be
served by a parameter. The top level loop (wasm_pc_f_loop) uses
PC_B for resuming a function. This value is either set by gogo,
or loaded from the Go stack at function return. Instead of
loading PC_B at each function return, we could make gogo stores
PC_B at the same stack location, and let the top level loop do
the load. This way, we don't need to use global PC_B to
communicate with the top level loop, and we can replace global
PC_B with a parameter.
PC_F is similar. It is even more so in that the only reader is
the top level loop. Let the top level loop read it from the stack,
and we can get rid of PC_F entirely.
PC_F and PC_B are used less entensively as SP, so this CL has
smaller performance gain.
Running on Chrome 74.0.3729.108 on Linux/AMD64,
name old time/op new time/op delta
BinaryTree17 16.6s ± 0% 16.2s ± 1% -2.59% (p=0.016 n=4+5)
Fannkuch11 11.1s ± 1% 10.8s ± 0% -2.65% (p=0.008 n=5+5)
FmtFprintfEmpty 231ns ± 1% 217ns ± 0% -6.06% (p=0.008 n=5+5)
FmtFprintfString 407ns ± 3% 375ns ± 2% -7.81% (p=0.008 n=5+5)
FmtFprintfInt 466ns ± 2% 430ns ± 0% -7.79% (p=0.016 n=5+4)
FmtFprintfIntInt 719ns ± 2% 673ns ± 2% -6.37% (p=0.008 n=5+5)
FmtFprintfPrefixedInt 706ns ± 1% 676ns ± 3% -4.31% (p=0.008 n=5+5)
FmtFprintfFloat 1.01µs ± 1% 0.97µs ± 1% -4.30% (p=0.008 n=5+5)
FmtManyArgs 2.67µs ± 1% 2.51µs ± 1% -5.95% (p=0.008 n=5+5)
GobDecode 30.7ms ± 9% 31.3ms ±34% ~ (p=0.222 n=5+5)
GobEncode 24.2ms ±23% 20.2ms ± 0% -16.36% (p=0.016 n=5+4)
Gzip 852ms ± 0% 823ms ± 0% -3.38% (p=0.016 n=4+5)
Gunzip 160ms ± 1% 151ms ± 1% -5.37% (p=0.008 n=5+5)
JSONEncode 35.7ms ± 1% 34.3ms ± 1% -3.81% (p=0.008 n=5+5)
JSONDecode 247ms ± 8% 254ms ± 7% ~ (p=0.548 n=5+5)
Mandelbrot200 5.39ms ± 0% 5.41ms ± 0% +0.42% (p=0.008 n=5+5)
GoParse 18.5ms ± 1% 18.3ms ± 2% ~ (p=0.343 n=4+4)
RegexpMatchEasy0_32 424ns ± 2% 397ns ± 0% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 2.88µs ± 0% 2.86µs ± 1% ~ (p=0.079 n=5+5)
RegexpMatchEasy1_32 395ns ± 2% 370ns ± 1% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 3.26µs ± 0% 3.19µs ± 1% -2.06% (p=0.008 n=5+5)
RegexpMatchMedium_32 564ns ± 1% 532ns ± 0% -5.71% (p=0.008 n=5+5)
RegexpMatchMedium_1K 146µs ± 2% 140µs ± 1% -4.62% (p=0.008 n=5+5)
RegexpMatchHard_32 8.47µs ± 1% 7.91µs ± 1% -6.65% (p=0.008 n=5+5)
RegexpMatchHard_1K 253µs ± 1% 236µs ± 2% -6.66% (p=0.008 n=5+5)
Revcomp 1.78s ± 4% 1.76s ± 5% ~ (p=1.000 n=5+5)
Template 292ms ±29% 269ms ± 5% ~ (p=0.690 n=5+5)
TimeParse 1.61µs ± 4% 1.54µs ± 1% -4.42% (p=0.008 n=5+5)
TimeFormat 1.66µs ± 3% 1.58µs ± 1% -5.22% (p=0.008 n=5+5)
[Geo mean] 232µs 221µs -4.54%
name old speed new speed delta
GobDecode 25.0MB/s ± 8% 25.1MB/s ±27% ~ (p=0.222 n=5+5)
GobEncode 32.8MB/s ±21% 38.0MB/s ± 0% +15.84% (p=0.016 n=5+4)
Gzip 22.8MB/s ± 0% 23.6MB/s ± 0% +3.49% (p=0.016 n=4+5)
Gunzip 121MB/s ± 1% 128MB/s ± 1% +5.68% (p=0.008 n=5+5)
JSONEncode 54.4MB/s ± 1% 56.5MB/s ± 1% +3.97% (p=0.008 n=5+5)
JSONDecode 7.88MB/s ± 8% 7.65MB/s ± 8% ~ (p=0.548 n=5+5)
GoParse 3.07MB/s ± 8% 3.00MB/s ±22% ~ (p=0.579 n=5+5)
RegexpMatchEasy0_32 75.6MB/s ± 2% 80.5MB/s ± 0% +6.58% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 356MB/s ± 0% 358MB/s ± 1% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32 81.1MB/s ± 2% 86.5MB/s ± 1% +6.69% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 314MB/s ± 0% 320MB/s ± 0% +2.10% (p=0.008 n=5+5)
RegexpMatchMedium_32 1.77MB/s ± 1% 1.88MB/s ± 0% +6.09% (p=0.016 n=5+4)
RegexpMatchMedium_1K 6.99MB/s ± 2% 7.33MB/s ± 1% +4.83% (p=0.008 n=5+5)
RegexpMatchHard_32 3.78MB/s ± 1% 4.04MB/s ± 1% +7.04% (p=0.008 n=5+5)
RegexpMatchHard_1K 4.04MB/s ± 1% 4.33MB/s ± 2% +7.17% (p=0.008 n=5+5)
Revcomp 143MB/s ± 4% 145MB/s ± 5% ~ (p=1.000 n=5+5)
Template 6.77MB/s ±24% 7.22MB/s ± 5% ~ (p=0.690 n=5+5)
[Geo mean] 24.4MB/s 25.4MB/s +4.18%
Change-Id: Ib80716e62992aec28b2c4a96af280c278f83aa49
Reviewed-on: https://go-review.googlesource.com/c/go/+/173980
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2019-04-25 17:25:54 -04:00
|
|
|
// Most of the Go functions has a single parameter (PC_B) in
|
|
|
|
|
// Wasm ABI. This is a list of exceptions.
|
|
|
|
|
var notUsePC_B = map[string]bool{
|
2022-11-01 16:46:43 -07:00
|
|
|
"_rt0_wasm_js": true,
|
2023-03-24 23:07:58 -07:00
|
|
|
"_rt0_wasm_wasip1": true,
|
2022-11-01 16:46:43 -07:00
|
|
|
"wasm_export_run": true,
|
|
|
|
|
"wasm_export_resume": true,
|
|
|
|
|
"wasm_export_getsp": true,
|
|
|
|
|
"wasm_pc_f_loop": true,
|
|
|
|
|
"gcWriteBarrier": true,
|
|
|
|
|
"runtime.gcWriteBarrier1": true,
|
|
|
|
|
"runtime.gcWriteBarrier2": true,
|
|
|
|
|
"runtime.gcWriteBarrier3": true,
|
|
|
|
|
"runtime.gcWriteBarrier4": true,
|
|
|
|
|
"runtime.gcWriteBarrier5": true,
|
|
|
|
|
"runtime.gcWriteBarrier6": true,
|
|
|
|
|
"runtime.gcWriteBarrier7": true,
|
|
|
|
|
"runtime.gcWriteBarrier8": true,
|
|
|
|
|
"runtime.wasmDiv": true,
|
|
|
|
|
"runtime.wasmTruncS": true,
|
|
|
|
|
"runtime.wasmTruncU": true,
|
|
|
|
|
"cmpbody": true,
|
|
|
|
|
"memeqbody": true,
|
|
|
|
|
"memcmp": true,
|
|
|
|
|
"memchr": true,
|
cmd, runtime: remove PC_F & PC_B globals on Wasm
Following the previous CL, this removes more global variables on
Wasm.
PC_B is used mostly for intra-function jumps, and for a function
telling its callee where to start or resume. This usage can be
served by a parameter. The top level loop (wasm_pc_f_loop) uses
PC_B for resuming a function. This value is either set by gogo,
or loaded from the Go stack at function return. Instead of
loading PC_B at each function return, we could make gogo stores
PC_B at the same stack location, and let the top level loop do
the load. This way, we don't need to use global PC_B to
communicate with the top level loop, and we can replace global
PC_B with a parameter.
PC_F is similar. It is even more so in that the only reader is
the top level loop. Let the top level loop read it from the stack,
and we can get rid of PC_F entirely.
PC_F and PC_B are used less entensively as SP, so this CL has
smaller performance gain.
Running on Chrome 74.0.3729.108 on Linux/AMD64,
name old time/op new time/op delta
BinaryTree17 16.6s ± 0% 16.2s ± 1% -2.59% (p=0.016 n=4+5)
Fannkuch11 11.1s ± 1% 10.8s ± 0% -2.65% (p=0.008 n=5+5)
FmtFprintfEmpty 231ns ± 1% 217ns ± 0% -6.06% (p=0.008 n=5+5)
FmtFprintfString 407ns ± 3% 375ns ± 2% -7.81% (p=0.008 n=5+5)
FmtFprintfInt 466ns ± 2% 430ns ± 0% -7.79% (p=0.016 n=5+4)
FmtFprintfIntInt 719ns ± 2% 673ns ± 2% -6.37% (p=0.008 n=5+5)
FmtFprintfPrefixedInt 706ns ± 1% 676ns ± 3% -4.31% (p=0.008 n=5+5)
FmtFprintfFloat 1.01µs ± 1% 0.97µs ± 1% -4.30% (p=0.008 n=5+5)
FmtManyArgs 2.67µs ± 1% 2.51µs ± 1% -5.95% (p=0.008 n=5+5)
GobDecode 30.7ms ± 9% 31.3ms ±34% ~ (p=0.222 n=5+5)
GobEncode 24.2ms ±23% 20.2ms ± 0% -16.36% (p=0.016 n=5+4)
Gzip 852ms ± 0% 823ms ± 0% -3.38% (p=0.016 n=4+5)
Gunzip 160ms ± 1% 151ms ± 1% -5.37% (p=0.008 n=5+5)
JSONEncode 35.7ms ± 1% 34.3ms ± 1% -3.81% (p=0.008 n=5+5)
JSONDecode 247ms ± 8% 254ms ± 7% ~ (p=0.548 n=5+5)
Mandelbrot200 5.39ms ± 0% 5.41ms ± 0% +0.42% (p=0.008 n=5+5)
GoParse 18.5ms ± 1% 18.3ms ± 2% ~ (p=0.343 n=4+4)
RegexpMatchEasy0_32 424ns ± 2% 397ns ± 0% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 2.88µs ± 0% 2.86µs ± 1% ~ (p=0.079 n=5+5)
RegexpMatchEasy1_32 395ns ± 2% 370ns ± 1% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 3.26µs ± 0% 3.19µs ± 1% -2.06% (p=0.008 n=5+5)
RegexpMatchMedium_32 564ns ± 1% 532ns ± 0% -5.71% (p=0.008 n=5+5)
RegexpMatchMedium_1K 146µs ± 2% 140µs ± 1% -4.62% (p=0.008 n=5+5)
RegexpMatchHard_32 8.47µs ± 1% 7.91µs ± 1% -6.65% (p=0.008 n=5+5)
RegexpMatchHard_1K 253µs ± 1% 236µs ± 2% -6.66% (p=0.008 n=5+5)
Revcomp 1.78s ± 4% 1.76s ± 5% ~ (p=1.000 n=5+5)
Template 292ms ±29% 269ms ± 5% ~ (p=0.690 n=5+5)
TimeParse 1.61µs ± 4% 1.54µs ± 1% -4.42% (p=0.008 n=5+5)
TimeFormat 1.66µs ± 3% 1.58µs ± 1% -5.22% (p=0.008 n=5+5)
[Geo mean] 232µs 221µs -4.54%
name old speed new speed delta
GobDecode 25.0MB/s ± 8% 25.1MB/s ±27% ~ (p=0.222 n=5+5)
GobEncode 32.8MB/s ±21% 38.0MB/s ± 0% +15.84% (p=0.016 n=5+4)
Gzip 22.8MB/s ± 0% 23.6MB/s ± 0% +3.49% (p=0.016 n=4+5)
Gunzip 121MB/s ± 1% 128MB/s ± 1% +5.68% (p=0.008 n=5+5)
JSONEncode 54.4MB/s ± 1% 56.5MB/s ± 1% +3.97% (p=0.008 n=5+5)
JSONDecode 7.88MB/s ± 8% 7.65MB/s ± 8% ~ (p=0.548 n=5+5)
GoParse 3.07MB/s ± 8% 3.00MB/s ±22% ~ (p=0.579 n=5+5)
RegexpMatchEasy0_32 75.6MB/s ± 2% 80.5MB/s ± 0% +6.58% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 356MB/s ± 0% 358MB/s ± 1% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32 81.1MB/s ± 2% 86.5MB/s ± 1% +6.69% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 314MB/s ± 0% 320MB/s ± 0% +2.10% (p=0.008 n=5+5)
RegexpMatchMedium_32 1.77MB/s ± 1% 1.88MB/s ± 0% +6.09% (p=0.016 n=5+4)
RegexpMatchMedium_1K 6.99MB/s ± 2% 7.33MB/s ± 1% +4.83% (p=0.008 n=5+5)
RegexpMatchHard_32 3.78MB/s ± 1% 4.04MB/s ± 1% +7.04% (p=0.008 n=5+5)
RegexpMatchHard_1K 4.04MB/s ± 1% 4.33MB/s ± 2% +7.17% (p=0.008 n=5+5)
Revcomp 143MB/s ± 4% 145MB/s ± 5% ~ (p=1.000 n=5+5)
Template 6.77MB/s ±24% 7.22MB/s ± 5% ~ (p=0.690 n=5+5)
[Geo mean] 24.4MB/s 25.4MB/s +4.18%
Change-Id: Ib80716e62992aec28b2c4a96af280c278f83aa49
Reviewed-on: https://go-review.googlesource.com/c/go/+/173980
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2019-04-25 17:25:54 -04:00
|
|
|
}
|
|
|
|
|
|
2018-03-29 00:55:53 +02:00
|
|
|
func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
|
2019-08-21 21:57:59 +02:00
|
|
|
type regVar struct {
|
|
|
|
|
global bool
|
|
|
|
|
index uint64
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type varDecl struct {
|
|
|
|
|
count uint64
|
|
|
|
|
typ valueType
|
|
|
|
|
}
|
2018-03-29 00:55:53 +02:00
|
|
|
|
2019-04-27 01:16:10 +02:00
|
|
|
hasLocalSP := false
|
2019-08-21 21:57:59 +02:00
|
|
|
regVars := [MAXREG - MINREG]*regVar{
|
|
|
|
|
REG_SP - MINREG: {true, 0},
|
|
|
|
|
REG_CTXT - MINREG: {true, 1},
|
|
|
|
|
REG_g - MINREG: {true, 2},
|
|
|
|
|
REG_RET0 - MINREG: {true, 3},
|
|
|
|
|
REG_RET1 - MINREG: {true, 4},
|
|
|
|
|
REG_RET2 - MINREG: {true, 5},
|
|
|
|
|
REG_RET3 - MINREG: {true, 6},
|
|
|
|
|
REG_PAUSE - MINREG: {true, 7},
|
|
|
|
|
}
|
|
|
|
|
var varDecls []*varDecl
|
|
|
|
|
useAssemblyRegMap := func() {
|
|
|
|
|
for i := int16(0); i < 16; i++ {
|
|
|
|
|
regVars[REG_R0+i-MINREG] = ®Var{false, uint64(i)}
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-10-19 19:04:29 +00:00
|
|
|
|
2018-03-29 00:55:53 +02:00
|
|
|
// Function starts with declaration of locals: numbers and types.
|
2019-04-27 01:16:10 +02:00
|
|
|
// Some functions use a special calling convention.
|
2018-03-29 00:55:53 +02:00
|
|
|
switch s.Name {
|
2023-03-24 23:07:58 -07:00
|
|
|
case "_rt0_wasm_js", "_rt0_wasm_wasip1", "wasm_export_run", "wasm_export_resume", "wasm_export_getsp",
|
|
|
|
|
"wasm_pc_f_loop", "runtime.wasmDiv", "runtime.wasmTruncS", "runtime.wasmTruncU", "memeqbody":
|
2019-08-21 21:57:59 +02:00
|
|
|
varDecls = []*varDecl{}
|
|
|
|
|
useAssemblyRegMap()
|
2019-04-27 01:16:10 +02:00
|
|
|
case "memchr", "memcmp":
|
2019-08-21 21:57:59 +02:00
|
|
|
varDecls = []*varDecl{{count: 2, typ: i32}}
|
|
|
|
|
useAssemblyRegMap()
|
2019-04-27 01:16:10 +02:00
|
|
|
case "cmpbody":
|
2019-08-21 21:57:59 +02:00
|
|
|
varDecls = []*varDecl{{count: 2, typ: i64}}
|
|
|
|
|
useAssemblyRegMap()
|
2022-11-01 16:46:43 -07:00
|
|
|
case "gcWriteBarrier":
|
|
|
|
|
varDecls = []*varDecl{{count: 5, typ: i64}}
|
|
|
|
|
useAssemblyRegMap()
|
|
|
|
|
case "runtime.gcWriteBarrier1",
|
|
|
|
|
"runtime.gcWriteBarrier2",
|
|
|
|
|
"runtime.gcWriteBarrier3",
|
|
|
|
|
"runtime.gcWriteBarrier4",
|
|
|
|
|
"runtime.gcWriteBarrier5",
|
|
|
|
|
"runtime.gcWriteBarrier6",
|
|
|
|
|
"runtime.gcWriteBarrier7",
|
|
|
|
|
"runtime.gcWriteBarrier8":
|
|
|
|
|
// no locals
|
2019-08-21 21:57:59 +02:00
|
|
|
useAssemblyRegMap()
|
2018-03-29 00:55:53 +02:00
|
|
|
default:
|
2019-08-21 21:57:59 +02:00
|
|
|
// Normal calling convention: PC_B as WebAssembly parameter. First local variable is local SP cache.
|
|
|
|
|
regVars[REG_PC_B-MINREG] = ®Var{false, 0}
|
2019-04-27 01:16:10 +02:00
|
|
|
hasLocalSP = true
|
2019-08-21 21:57:59 +02:00
|
|
|
|
|
|
|
|
var regUsed [MAXREG - MINREG]bool
|
2020-07-19 00:30:12 -04:00
|
|
|
for p := s.Func().Text; p != nil; p = p.Link {
|
2019-08-21 21:57:59 +02:00
|
|
|
if p.From.Reg != 0 {
|
|
|
|
|
regUsed[p.From.Reg-MINREG] = true
|
|
|
|
|
}
|
|
|
|
|
if p.To.Reg != 0 {
|
|
|
|
|
regUsed[p.To.Reg-MINREG] = true
|
|
|
|
|
}
|
2018-10-19 19:04:29 +00:00
|
|
|
}
|
|
|
|
|
|
2019-08-21 21:57:59 +02:00
|
|
|
regs := []int16{REG_SP}
|
2019-09-12 21:05:45 +02:00
|
|
|
for reg := int16(REG_R0); reg <= REG_F31; reg++ {
|
2019-08-21 21:57:59 +02:00
|
|
|
if regUsed[reg-MINREG] {
|
|
|
|
|
regs = append(regs, reg)
|
|
|
|
|
}
|
2018-10-19 19:04:29 +00:00
|
|
|
}
|
2019-08-21 21:57:59 +02:00
|
|
|
|
|
|
|
|
var lastDecl *varDecl
|
|
|
|
|
for i, reg := range regs {
|
|
|
|
|
t := regType(reg)
|
|
|
|
|
if lastDecl == nil || lastDecl.typ != t {
|
|
|
|
|
lastDecl = &varDecl{
|
|
|
|
|
count: 0,
|
|
|
|
|
typ: t,
|
|
|
|
|
}
|
|
|
|
|
varDecls = append(varDecls, lastDecl)
|
|
|
|
|
}
|
|
|
|
|
lastDecl.count++
|
|
|
|
|
if reg != REG_SP {
|
|
|
|
|
regVars[reg-MINREG] = ®Var{false, 1 + uint64(i)}
|
|
|
|
|
}
|
2018-10-19 19:04:29 +00:00
|
|
|
}
|
2018-03-29 00:55:53 +02:00
|
|
|
}
|
|
|
|
|
|
2019-08-21 21:57:59 +02:00
|
|
|
w := new(bytes.Buffer)
|
|
|
|
|
|
|
|
|
|
writeUleb128(w, uint64(len(varDecls)))
|
|
|
|
|
for _, decl := range varDecls {
|
|
|
|
|
writeUleb128(w, decl.count)
|
|
|
|
|
w.WriteByte(byte(decl.typ))
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-27 01:16:10 +02:00
|
|
|
if hasLocalSP {
|
|
|
|
|
// Copy SP from its global variable into a local variable. Accessing a local variable is more efficient.
|
|
|
|
|
updateLocalSP(w)
|
|
|
|
|
}
|
cmd, runtime: remove PC_F & PC_B globals on Wasm
Following the previous CL, this removes more global variables on
Wasm.
PC_B is used mostly for intra-function jumps, and for a function
telling its callee where to start or resume. This usage can be
served by a parameter. The top level loop (wasm_pc_f_loop) uses
PC_B for resuming a function. This value is either set by gogo,
or loaded from the Go stack at function return. Instead of
loading PC_B at each function return, we could make gogo stores
PC_B at the same stack location, and let the top level loop do
the load. This way, we don't need to use global PC_B to
communicate with the top level loop, and we can replace global
PC_B with a parameter.
PC_F is similar. It is even more so in that the only reader is
the top level loop. Let the top level loop read it from the stack,
and we can get rid of PC_F entirely.
PC_F and PC_B are used less entensively as SP, so this CL has
smaller performance gain.
Running on Chrome 74.0.3729.108 on Linux/AMD64,
name old time/op new time/op delta
BinaryTree17 16.6s ± 0% 16.2s ± 1% -2.59% (p=0.016 n=4+5)
Fannkuch11 11.1s ± 1% 10.8s ± 0% -2.65% (p=0.008 n=5+5)
FmtFprintfEmpty 231ns ± 1% 217ns ± 0% -6.06% (p=0.008 n=5+5)
FmtFprintfString 407ns ± 3% 375ns ± 2% -7.81% (p=0.008 n=5+5)
FmtFprintfInt 466ns ± 2% 430ns ± 0% -7.79% (p=0.016 n=5+4)
FmtFprintfIntInt 719ns ± 2% 673ns ± 2% -6.37% (p=0.008 n=5+5)
FmtFprintfPrefixedInt 706ns ± 1% 676ns ± 3% -4.31% (p=0.008 n=5+5)
FmtFprintfFloat 1.01µs ± 1% 0.97µs ± 1% -4.30% (p=0.008 n=5+5)
FmtManyArgs 2.67µs ± 1% 2.51µs ± 1% -5.95% (p=0.008 n=5+5)
GobDecode 30.7ms ± 9% 31.3ms ±34% ~ (p=0.222 n=5+5)
GobEncode 24.2ms ±23% 20.2ms ± 0% -16.36% (p=0.016 n=5+4)
Gzip 852ms ± 0% 823ms ± 0% -3.38% (p=0.016 n=4+5)
Gunzip 160ms ± 1% 151ms ± 1% -5.37% (p=0.008 n=5+5)
JSONEncode 35.7ms ± 1% 34.3ms ± 1% -3.81% (p=0.008 n=5+5)
JSONDecode 247ms ± 8% 254ms ± 7% ~ (p=0.548 n=5+5)
Mandelbrot200 5.39ms ± 0% 5.41ms ± 0% +0.42% (p=0.008 n=5+5)
GoParse 18.5ms ± 1% 18.3ms ± 2% ~ (p=0.343 n=4+4)
RegexpMatchEasy0_32 424ns ± 2% 397ns ± 0% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 2.88µs ± 0% 2.86µs ± 1% ~ (p=0.079 n=5+5)
RegexpMatchEasy1_32 395ns ± 2% 370ns ± 1% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 3.26µs ± 0% 3.19µs ± 1% -2.06% (p=0.008 n=5+5)
RegexpMatchMedium_32 564ns ± 1% 532ns ± 0% -5.71% (p=0.008 n=5+5)
RegexpMatchMedium_1K 146µs ± 2% 140µs ± 1% -4.62% (p=0.008 n=5+5)
RegexpMatchHard_32 8.47µs ± 1% 7.91µs ± 1% -6.65% (p=0.008 n=5+5)
RegexpMatchHard_1K 253µs ± 1% 236µs ± 2% -6.66% (p=0.008 n=5+5)
Revcomp 1.78s ± 4% 1.76s ± 5% ~ (p=1.000 n=5+5)
Template 292ms ±29% 269ms ± 5% ~ (p=0.690 n=5+5)
TimeParse 1.61µs ± 4% 1.54µs ± 1% -4.42% (p=0.008 n=5+5)
TimeFormat 1.66µs ± 3% 1.58µs ± 1% -5.22% (p=0.008 n=5+5)
[Geo mean] 232µs 221µs -4.54%
name old speed new speed delta
GobDecode 25.0MB/s ± 8% 25.1MB/s ±27% ~ (p=0.222 n=5+5)
GobEncode 32.8MB/s ±21% 38.0MB/s ± 0% +15.84% (p=0.016 n=5+4)
Gzip 22.8MB/s ± 0% 23.6MB/s ± 0% +3.49% (p=0.016 n=4+5)
Gunzip 121MB/s ± 1% 128MB/s ± 1% +5.68% (p=0.008 n=5+5)
JSONEncode 54.4MB/s ± 1% 56.5MB/s ± 1% +3.97% (p=0.008 n=5+5)
JSONDecode 7.88MB/s ± 8% 7.65MB/s ± 8% ~ (p=0.548 n=5+5)
GoParse 3.07MB/s ± 8% 3.00MB/s ±22% ~ (p=0.579 n=5+5)
RegexpMatchEasy0_32 75.6MB/s ± 2% 80.5MB/s ± 0% +6.58% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 356MB/s ± 0% 358MB/s ± 1% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32 81.1MB/s ± 2% 86.5MB/s ± 1% +6.69% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 314MB/s ± 0% 320MB/s ± 0% +2.10% (p=0.008 n=5+5)
RegexpMatchMedium_32 1.77MB/s ± 1% 1.88MB/s ± 0% +6.09% (p=0.016 n=5+4)
RegexpMatchMedium_1K 6.99MB/s ± 2% 7.33MB/s ± 1% +4.83% (p=0.008 n=5+5)
RegexpMatchHard_32 3.78MB/s ± 1% 4.04MB/s ± 1% +7.04% (p=0.008 n=5+5)
RegexpMatchHard_1K 4.04MB/s ± 1% 4.33MB/s ± 2% +7.17% (p=0.008 n=5+5)
Revcomp 143MB/s ± 4% 145MB/s ± 5% ~ (p=1.000 n=5+5)
Template 6.77MB/s ±24% 7.22MB/s ± 5% ~ (p=0.690 n=5+5)
[Geo mean] 24.4MB/s 25.4MB/s +4.18%
Change-Id: Ib80716e62992aec28b2c4a96af280c278f83aa49
Reviewed-on: https://go-review.googlesource.com/c/go/+/173980
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2019-04-25 17:25:54 -04:00
|
|
|
|
2020-07-19 00:30:12 -04:00
|
|
|
for p := s.Func().Text; p != nil; p = p.Link {
|
2018-03-29 00:55:53 +02:00
|
|
|
switch p.As {
|
|
|
|
|
case AGet:
|
|
|
|
|
if p.From.Type != obj.TYPE_REG {
|
|
|
|
|
panic("bad Get: argument is not a register")
|
|
|
|
|
}
|
|
|
|
|
reg := p.From.Reg
|
2019-08-21 21:57:59 +02:00
|
|
|
v := regVars[reg-MINREG]
|
|
|
|
|
if v == nil {
|
2018-03-29 00:55:53 +02:00
|
|
|
panic("bad Get: invalid register")
|
|
|
|
|
}
|
2019-08-21 21:57:59 +02:00
|
|
|
if reg == REG_SP && hasLocalSP {
|
|
|
|
|
writeOpcode(w, ALocalGet)
|
|
|
|
|
writeUleb128(w, 1) // local SP
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if v.global {
|
|
|
|
|
writeOpcode(w, AGlobalGet)
|
|
|
|
|
} else {
|
|
|
|
|
writeOpcode(w, ALocalGet)
|
|
|
|
|
}
|
|
|
|
|
writeUleb128(w, v.index)
|
2018-03-29 00:55:53 +02:00
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
case ASet:
|
|
|
|
|
if p.To.Type != obj.TYPE_REG {
|
|
|
|
|
panic("bad Set: argument is not a register")
|
|
|
|
|
}
|
|
|
|
|
reg := p.To.Reg
|
2019-08-21 21:57:59 +02:00
|
|
|
v := regVars[reg-MINREG]
|
|
|
|
|
if v == nil {
|
|
|
|
|
panic("bad Set: invalid register")
|
|
|
|
|
}
|
|
|
|
|
if reg == REG_SP && hasLocalSP {
|
|
|
|
|
writeOpcode(w, ALocalTee)
|
|
|
|
|
writeUleb128(w, 1) // local SP
|
|
|
|
|
}
|
|
|
|
|
if v.global {
|
|
|
|
|
writeOpcode(w, AGlobalSet)
|
|
|
|
|
} else {
|
2018-03-29 00:55:53 +02:00
|
|
|
if p.Link.As == AGet && p.Link.From.Reg == reg {
|
2019-08-21 21:57:59 +02:00
|
|
|
writeOpcode(w, ALocalTee)
|
2018-03-29 00:55:53 +02:00
|
|
|
p = p.Link
|
|
|
|
|
} else {
|
2019-08-21 21:57:59 +02:00
|
|
|
writeOpcode(w, ALocalSet)
|
2018-03-29 00:55:53 +02:00
|
|
|
}
|
|
|
|
|
}
|
2019-08-21 21:57:59 +02:00
|
|
|
writeUleb128(w, v.index)
|
2018-03-29 00:55:53 +02:00
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
case ATee:
|
|
|
|
|
if p.To.Type != obj.TYPE_REG {
|
|
|
|
|
panic("bad Tee: argument is not a register")
|
|
|
|
|
}
|
|
|
|
|
reg := p.To.Reg
|
2019-08-21 21:57:59 +02:00
|
|
|
v := regVars[reg-MINREG]
|
|
|
|
|
if v == nil {
|
2018-03-29 00:55:53 +02:00
|
|
|
panic("bad Tee: invalid register")
|
|
|
|
|
}
|
2019-08-21 21:57:59 +02:00
|
|
|
writeOpcode(w, ALocalTee)
|
|
|
|
|
writeUleb128(w, v.index)
|
2018-03-29 00:55:53 +02:00
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
case ANot:
|
2019-08-21 21:57:59 +02:00
|
|
|
writeOpcode(w, AI32Eqz)
|
2018-03-29 00:55:53 +02:00
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
case obj.AUNDEF:
|
2019-08-21 21:57:59 +02:00
|
|
|
writeOpcode(w, AUnreachable)
|
2018-03-29 00:55:53 +02:00
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
case obj.ANOP, obj.ATEXT, obj.AFUNCDATA, obj.APCDATA:
|
|
|
|
|
// ignore
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
2019-08-21 21:57:59 +02:00
|
|
|
writeOpcode(w, p.As)
|
2018-03-29 00:55:53 +02:00
|
|
|
|
|
|
|
|
switch p.As {
|
|
|
|
|
case ABlock, ALoop, AIf:
|
|
|
|
|
if p.From.Offset != 0 {
|
|
|
|
|
// block type, rarely used, e.g. for code compiled with emscripten
|
|
|
|
|
w.WriteByte(0x80 - byte(p.From.Offset))
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
w.WriteByte(0x40)
|
|
|
|
|
|
|
|
|
|
case ABr, ABrIf:
|
|
|
|
|
if p.To.Type != obj.TYPE_CONST {
|
|
|
|
|
panic("bad Br/BrIf")
|
|
|
|
|
}
|
|
|
|
|
writeUleb128(w, uint64(p.To.Offset))
|
|
|
|
|
|
|
|
|
|
case ABrTable:
|
|
|
|
|
idxs := p.To.Val.([]uint64)
|
|
|
|
|
writeUleb128(w, uint64(len(idxs)-1))
|
|
|
|
|
for _, idx := range idxs {
|
|
|
|
|
writeUleb128(w, idx)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case ACall:
|
|
|
|
|
switch p.To.Type {
|
|
|
|
|
case obj.TYPE_CONST:
|
|
|
|
|
writeUleb128(w, uint64(p.To.Offset))
|
|
|
|
|
|
|
|
|
|
case obj.TYPE_MEM:
|
|
|
|
|
if p.To.Name != obj.NAME_EXTERN && p.To.Name != obj.NAME_STATIC {
|
|
|
|
|
fmt.Println(p.To)
|
|
|
|
|
panic("bad name for Call")
|
|
|
|
|
}
|
|
|
|
|
r := obj.Addrel(s)
|
2020-09-20 23:29:20 -04:00
|
|
|
r.Siz = 1 // actually variable sized
|
2018-03-29 00:55:53 +02:00
|
|
|
r.Off = int32(w.Len())
|
|
|
|
|
r.Type = objabi.R_CALL
|
|
|
|
|
if p.Mark&WasmImport != 0 {
|
|
|
|
|
r.Type = objabi.R_WASMIMPORT
|
|
|
|
|
}
|
|
|
|
|
r.Sym = p.To.Sym
|
2019-04-27 01:16:10 +02:00
|
|
|
if hasLocalSP {
|
|
|
|
|
// The stack may have moved, which changes SP. Update the local SP variable.
|
|
|
|
|
updateLocalSP(w)
|
|
|
|
|
}
|
2018-03-29 00:55:53 +02:00
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
panic("bad type for Call")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case ACallIndirect:
|
|
|
|
|
writeUleb128(w, uint64(p.To.Offset))
|
|
|
|
|
w.WriteByte(0x00) // reserved value
|
2019-04-27 01:16:10 +02:00
|
|
|
if hasLocalSP {
|
|
|
|
|
// The stack may have moved, which changes SP. Update the local SP variable.
|
|
|
|
|
updateLocalSP(w)
|
|
|
|
|
}
|
2018-03-29 00:55:53 +02:00
|
|
|
|
|
|
|
|
case AI32Const, AI64Const:
|
|
|
|
|
if p.From.Name == obj.NAME_EXTERN {
|
|
|
|
|
r := obj.Addrel(s)
|
2020-09-20 23:29:20 -04:00
|
|
|
r.Siz = 1 // actually variable sized
|
2018-03-29 00:55:53 +02:00
|
|
|
r.Off = int32(w.Len())
|
|
|
|
|
r.Type = objabi.R_ADDR
|
|
|
|
|
r.Sym = p.From.Sym
|
|
|
|
|
r.Add = p.From.Offset
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
writeSleb128(w, p.From.Offset)
|
|
|
|
|
|
2019-09-12 21:05:45 +02:00
|
|
|
case AF32Const:
|
|
|
|
|
b := make([]byte, 4)
|
|
|
|
|
binary.LittleEndian.PutUint32(b, math.Float32bits(float32(p.From.Val.(float64))))
|
|
|
|
|
w.Write(b)
|
|
|
|
|
|
2018-03-29 00:55:53 +02:00
|
|
|
case AF64Const:
|
|
|
|
|
b := make([]byte, 8)
|
|
|
|
|
binary.LittleEndian.PutUint64(b, math.Float64bits(p.From.Val.(float64)))
|
|
|
|
|
w.Write(b)
|
|
|
|
|
|
|
|
|
|
case AI32Load, AI64Load, AF32Load, AF64Load, AI32Load8S, AI32Load8U, AI32Load16S, AI32Load16U, AI64Load8S, AI64Load8U, AI64Load16S, AI64Load16U, AI64Load32S, AI64Load32U:
|
|
|
|
|
if p.From.Offset < 0 {
|
|
|
|
|
panic("negative offset for *Load")
|
|
|
|
|
}
|
|
|
|
|
if p.From.Type != obj.TYPE_CONST {
|
|
|
|
|
panic("bad type for *Load")
|
|
|
|
|
}
|
2018-05-07 16:18:19 +02:00
|
|
|
if p.From.Offset > math.MaxUint32 {
|
|
|
|
|
ctxt.Diag("bad offset in %v", p)
|
|
|
|
|
}
|
2018-03-29 00:55:53 +02:00
|
|
|
writeUleb128(w, align(p.As))
|
|
|
|
|
writeUleb128(w, uint64(p.From.Offset))
|
|
|
|
|
|
|
|
|
|
case AI32Store, AI64Store, AF32Store, AF64Store, AI32Store8, AI32Store16, AI64Store8, AI64Store16, AI64Store32:
|
|
|
|
|
if p.To.Offset < 0 {
|
|
|
|
|
panic("negative offset")
|
|
|
|
|
}
|
2018-05-07 16:18:19 +02:00
|
|
|
if p.From.Offset > math.MaxUint32 {
|
|
|
|
|
ctxt.Diag("bad offset in %v", p)
|
|
|
|
|
}
|
2018-03-29 00:55:53 +02:00
|
|
|
writeUleb128(w, align(p.As))
|
|
|
|
|
writeUleb128(w, uint64(p.To.Offset))
|
|
|
|
|
|
2022-10-21 22:22:12 -05:00
|
|
|
case ACurrentMemory, AGrowMemory, AMemoryFill:
|
|
|
|
|
w.WriteByte(0x00)
|
|
|
|
|
|
|
|
|
|
case AMemoryCopy:
|
|
|
|
|
w.WriteByte(0x00)
|
2018-03-29 00:55:53 +02:00
|
|
|
w.WriteByte(0x00)
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
w.WriteByte(0x0b) // end
|
|
|
|
|
|
|
|
|
|
s.P = w.Bytes()
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-27 01:16:10 +02:00
|
|
|
func updateLocalSP(w *bytes.Buffer) {
|
2019-08-21 21:57:59 +02:00
|
|
|
writeOpcode(w, AGlobalGet)
|
cmd, runtime: remove PC_F & PC_B globals on Wasm
Following the previous CL, this removes more global variables on
Wasm.
PC_B is used mostly for intra-function jumps, and for a function
telling its callee where to start or resume. This usage can be
served by a parameter. The top level loop (wasm_pc_f_loop) uses
PC_B for resuming a function. This value is either set by gogo,
or loaded from the Go stack at function return. Instead of
loading PC_B at each function return, we could make gogo stores
PC_B at the same stack location, and let the top level loop do
the load. This way, we don't need to use global PC_B to
communicate with the top level loop, and we can replace global
PC_B with a parameter.
PC_F is similar. It is even more so in that the only reader is
the top level loop. Let the top level loop read it from the stack,
and we can get rid of PC_F entirely.
PC_F and PC_B are used less entensively as SP, so this CL has
smaller performance gain.
Running on Chrome 74.0.3729.108 on Linux/AMD64,
name old time/op new time/op delta
BinaryTree17 16.6s ± 0% 16.2s ± 1% -2.59% (p=0.016 n=4+5)
Fannkuch11 11.1s ± 1% 10.8s ± 0% -2.65% (p=0.008 n=5+5)
FmtFprintfEmpty 231ns ± 1% 217ns ± 0% -6.06% (p=0.008 n=5+5)
FmtFprintfString 407ns ± 3% 375ns ± 2% -7.81% (p=0.008 n=5+5)
FmtFprintfInt 466ns ± 2% 430ns ± 0% -7.79% (p=0.016 n=5+4)
FmtFprintfIntInt 719ns ± 2% 673ns ± 2% -6.37% (p=0.008 n=5+5)
FmtFprintfPrefixedInt 706ns ± 1% 676ns ± 3% -4.31% (p=0.008 n=5+5)
FmtFprintfFloat 1.01µs ± 1% 0.97µs ± 1% -4.30% (p=0.008 n=5+5)
FmtManyArgs 2.67µs ± 1% 2.51µs ± 1% -5.95% (p=0.008 n=5+5)
GobDecode 30.7ms ± 9% 31.3ms ±34% ~ (p=0.222 n=5+5)
GobEncode 24.2ms ±23% 20.2ms ± 0% -16.36% (p=0.016 n=5+4)
Gzip 852ms ± 0% 823ms ± 0% -3.38% (p=0.016 n=4+5)
Gunzip 160ms ± 1% 151ms ± 1% -5.37% (p=0.008 n=5+5)
JSONEncode 35.7ms ± 1% 34.3ms ± 1% -3.81% (p=0.008 n=5+5)
JSONDecode 247ms ± 8% 254ms ± 7% ~ (p=0.548 n=5+5)
Mandelbrot200 5.39ms ± 0% 5.41ms ± 0% +0.42% (p=0.008 n=5+5)
GoParse 18.5ms ± 1% 18.3ms ± 2% ~ (p=0.343 n=4+4)
RegexpMatchEasy0_32 424ns ± 2% 397ns ± 0% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 2.88µs ± 0% 2.86µs ± 1% ~ (p=0.079 n=5+5)
RegexpMatchEasy1_32 395ns ± 2% 370ns ± 1% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 3.26µs ± 0% 3.19µs ± 1% -2.06% (p=0.008 n=5+5)
RegexpMatchMedium_32 564ns ± 1% 532ns ± 0% -5.71% (p=0.008 n=5+5)
RegexpMatchMedium_1K 146µs ± 2% 140µs ± 1% -4.62% (p=0.008 n=5+5)
RegexpMatchHard_32 8.47µs ± 1% 7.91µs ± 1% -6.65% (p=0.008 n=5+5)
RegexpMatchHard_1K 253µs ± 1% 236µs ± 2% -6.66% (p=0.008 n=5+5)
Revcomp 1.78s ± 4% 1.76s ± 5% ~ (p=1.000 n=5+5)
Template 292ms ±29% 269ms ± 5% ~ (p=0.690 n=5+5)
TimeParse 1.61µs ± 4% 1.54µs ± 1% -4.42% (p=0.008 n=5+5)
TimeFormat 1.66µs ± 3% 1.58µs ± 1% -5.22% (p=0.008 n=5+5)
[Geo mean] 232µs 221µs -4.54%
name old speed new speed delta
GobDecode 25.0MB/s ± 8% 25.1MB/s ±27% ~ (p=0.222 n=5+5)
GobEncode 32.8MB/s ±21% 38.0MB/s ± 0% +15.84% (p=0.016 n=5+4)
Gzip 22.8MB/s ± 0% 23.6MB/s ± 0% +3.49% (p=0.016 n=4+5)
Gunzip 121MB/s ± 1% 128MB/s ± 1% +5.68% (p=0.008 n=5+5)
JSONEncode 54.4MB/s ± 1% 56.5MB/s ± 1% +3.97% (p=0.008 n=5+5)
JSONDecode 7.88MB/s ± 8% 7.65MB/s ± 8% ~ (p=0.548 n=5+5)
GoParse 3.07MB/s ± 8% 3.00MB/s ±22% ~ (p=0.579 n=5+5)
RegexpMatchEasy0_32 75.6MB/s ± 2% 80.5MB/s ± 0% +6.58% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 356MB/s ± 0% 358MB/s ± 1% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32 81.1MB/s ± 2% 86.5MB/s ± 1% +6.69% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 314MB/s ± 0% 320MB/s ± 0% +2.10% (p=0.008 n=5+5)
RegexpMatchMedium_32 1.77MB/s ± 1% 1.88MB/s ± 0% +6.09% (p=0.016 n=5+4)
RegexpMatchMedium_1K 6.99MB/s ± 2% 7.33MB/s ± 1% +4.83% (p=0.008 n=5+5)
RegexpMatchHard_32 3.78MB/s ± 1% 4.04MB/s ± 1% +7.04% (p=0.008 n=5+5)
RegexpMatchHard_1K 4.04MB/s ± 1% 4.33MB/s ± 2% +7.17% (p=0.008 n=5+5)
Revcomp 143MB/s ± 4% 145MB/s ± 5% ~ (p=1.000 n=5+5)
Template 6.77MB/s ±24% 7.22MB/s ± 5% ~ (p=0.690 n=5+5)
[Geo mean] 24.4MB/s 25.4MB/s +4.18%
Change-Id: Ib80716e62992aec28b2c4a96af280c278f83aa49
Reviewed-on: https://go-review.googlesource.com/c/go/+/173980
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2019-04-25 17:25:54 -04:00
|
|
|
writeUleb128(w, 0) // global SP
|
2019-08-21 21:57:59 +02:00
|
|
|
writeOpcode(w, ALocalSet)
|
cmd, runtime: remove PC_F & PC_B globals on Wasm
Following the previous CL, this removes more global variables on
Wasm.
PC_B is used mostly for intra-function jumps, and for a function
telling its callee where to start or resume. This usage can be
served by a parameter. The top level loop (wasm_pc_f_loop) uses
PC_B for resuming a function. This value is either set by gogo,
or loaded from the Go stack at function return. Instead of
loading PC_B at each function return, we could make gogo stores
PC_B at the same stack location, and let the top level loop do
the load. This way, we don't need to use global PC_B to
communicate with the top level loop, and we can replace global
PC_B with a parameter.
PC_F is similar. It is even more so in that the only reader is
the top level loop. Let the top level loop read it from the stack,
and we can get rid of PC_F entirely.
PC_F and PC_B are used less entensively as SP, so this CL has
smaller performance gain.
Running on Chrome 74.0.3729.108 on Linux/AMD64,
name old time/op new time/op delta
BinaryTree17 16.6s ± 0% 16.2s ± 1% -2.59% (p=0.016 n=4+5)
Fannkuch11 11.1s ± 1% 10.8s ± 0% -2.65% (p=0.008 n=5+5)
FmtFprintfEmpty 231ns ± 1% 217ns ± 0% -6.06% (p=0.008 n=5+5)
FmtFprintfString 407ns ± 3% 375ns ± 2% -7.81% (p=0.008 n=5+5)
FmtFprintfInt 466ns ± 2% 430ns ± 0% -7.79% (p=0.016 n=5+4)
FmtFprintfIntInt 719ns ± 2% 673ns ± 2% -6.37% (p=0.008 n=5+5)
FmtFprintfPrefixedInt 706ns ± 1% 676ns ± 3% -4.31% (p=0.008 n=5+5)
FmtFprintfFloat 1.01µs ± 1% 0.97µs ± 1% -4.30% (p=0.008 n=5+5)
FmtManyArgs 2.67µs ± 1% 2.51µs ± 1% -5.95% (p=0.008 n=5+5)
GobDecode 30.7ms ± 9% 31.3ms ±34% ~ (p=0.222 n=5+5)
GobEncode 24.2ms ±23% 20.2ms ± 0% -16.36% (p=0.016 n=5+4)
Gzip 852ms ± 0% 823ms ± 0% -3.38% (p=0.016 n=4+5)
Gunzip 160ms ± 1% 151ms ± 1% -5.37% (p=0.008 n=5+5)
JSONEncode 35.7ms ± 1% 34.3ms ± 1% -3.81% (p=0.008 n=5+5)
JSONDecode 247ms ± 8% 254ms ± 7% ~ (p=0.548 n=5+5)
Mandelbrot200 5.39ms ± 0% 5.41ms ± 0% +0.42% (p=0.008 n=5+5)
GoParse 18.5ms ± 1% 18.3ms ± 2% ~ (p=0.343 n=4+4)
RegexpMatchEasy0_32 424ns ± 2% 397ns ± 0% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 2.88µs ± 0% 2.86µs ± 1% ~ (p=0.079 n=5+5)
RegexpMatchEasy1_32 395ns ± 2% 370ns ± 1% -6.23% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 3.26µs ± 0% 3.19µs ± 1% -2.06% (p=0.008 n=5+5)
RegexpMatchMedium_32 564ns ± 1% 532ns ± 0% -5.71% (p=0.008 n=5+5)
RegexpMatchMedium_1K 146µs ± 2% 140µs ± 1% -4.62% (p=0.008 n=5+5)
RegexpMatchHard_32 8.47µs ± 1% 7.91µs ± 1% -6.65% (p=0.008 n=5+5)
RegexpMatchHard_1K 253µs ± 1% 236µs ± 2% -6.66% (p=0.008 n=5+5)
Revcomp 1.78s ± 4% 1.76s ± 5% ~ (p=1.000 n=5+5)
Template 292ms ±29% 269ms ± 5% ~ (p=0.690 n=5+5)
TimeParse 1.61µs ± 4% 1.54µs ± 1% -4.42% (p=0.008 n=5+5)
TimeFormat 1.66µs ± 3% 1.58µs ± 1% -5.22% (p=0.008 n=5+5)
[Geo mean] 232µs 221µs -4.54%
name old speed new speed delta
GobDecode 25.0MB/s ± 8% 25.1MB/s ±27% ~ (p=0.222 n=5+5)
GobEncode 32.8MB/s ±21% 38.0MB/s ± 0% +15.84% (p=0.016 n=5+4)
Gzip 22.8MB/s ± 0% 23.6MB/s ± 0% +3.49% (p=0.016 n=4+5)
Gunzip 121MB/s ± 1% 128MB/s ± 1% +5.68% (p=0.008 n=5+5)
JSONEncode 54.4MB/s ± 1% 56.5MB/s ± 1% +3.97% (p=0.008 n=5+5)
JSONDecode 7.88MB/s ± 8% 7.65MB/s ± 8% ~ (p=0.548 n=5+5)
GoParse 3.07MB/s ± 8% 3.00MB/s ±22% ~ (p=0.579 n=5+5)
RegexpMatchEasy0_32 75.6MB/s ± 2% 80.5MB/s ± 0% +6.58% (p=0.008 n=5+5)
RegexpMatchEasy0_1K 356MB/s ± 0% 358MB/s ± 1% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32 81.1MB/s ± 2% 86.5MB/s ± 1% +6.69% (p=0.008 n=5+5)
RegexpMatchEasy1_1K 314MB/s ± 0% 320MB/s ± 0% +2.10% (p=0.008 n=5+5)
RegexpMatchMedium_32 1.77MB/s ± 1% 1.88MB/s ± 0% +6.09% (p=0.016 n=5+4)
RegexpMatchMedium_1K 6.99MB/s ± 2% 7.33MB/s ± 1% +4.83% (p=0.008 n=5+5)
RegexpMatchHard_32 3.78MB/s ± 1% 4.04MB/s ± 1% +7.04% (p=0.008 n=5+5)
RegexpMatchHard_1K 4.04MB/s ± 1% 4.33MB/s ± 2% +7.17% (p=0.008 n=5+5)
Revcomp 143MB/s ± 4% 145MB/s ± 5% ~ (p=1.000 n=5+5)
Template 6.77MB/s ±24% 7.22MB/s ± 5% ~ (p=0.690 n=5+5)
[Geo mean] 24.4MB/s 25.4MB/s +4.18%
Change-Id: Ib80716e62992aec28b2c4a96af280c278f83aa49
Reviewed-on: https://go-review.googlesource.com/c/go/+/173980
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2019-04-25 17:25:54 -04:00
|
|
|
writeUleb128(w, 1) // local SP
|
2019-04-27 01:16:10 +02:00
|
|
|
}
|
|
|
|
|
|
2019-08-21 21:57:59 +02:00
|
|
|
func writeOpcode(w *bytes.Buffer, as obj.As) {
|
|
|
|
|
switch {
|
|
|
|
|
case as < AUnreachable:
|
|
|
|
|
panic(fmt.Sprintf("unexpected assembler op: %s", as))
|
|
|
|
|
case as < AEnd:
|
|
|
|
|
w.WriteByte(byte(as - AUnreachable + 0x00))
|
|
|
|
|
case as < ADrop:
|
|
|
|
|
w.WriteByte(byte(as - AEnd + 0x0B))
|
|
|
|
|
case as < ALocalGet:
|
|
|
|
|
w.WriteByte(byte(as - ADrop + 0x1A))
|
|
|
|
|
case as < AI32Load:
|
|
|
|
|
w.WriteByte(byte(as - ALocalGet + 0x20))
|
|
|
|
|
case as < AI32TruncSatF32S:
|
|
|
|
|
w.WriteByte(byte(as - AI32Load + 0x28))
|
|
|
|
|
case as < ALast:
|
|
|
|
|
w.WriteByte(0xFC)
|
|
|
|
|
w.WriteByte(byte(as - AI32TruncSatF32S + 0x00))
|
|
|
|
|
default:
|
|
|
|
|
panic(fmt.Sprintf("unexpected assembler op: %s", as))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type valueType byte
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
i32 valueType = 0x7F
|
|
|
|
|
i64 valueType = 0x7E
|
|
|
|
|
f32 valueType = 0x7D
|
|
|
|
|
f64 valueType = 0x7C
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
func regType(reg int16) valueType {
|
|
|
|
|
switch {
|
|
|
|
|
case reg == REG_SP:
|
|
|
|
|
return i32
|
|
|
|
|
case reg >= REG_R0 && reg <= REG_R15:
|
|
|
|
|
return i64
|
|
|
|
|
case reg >= REG_F0 && reg <= REG_F15:
|
2019-09-12 21:05:45 +02:00
|
|
|
return f32
|
|
|
|
|
case reg >= REG_F16 && reg <= REG_F31:
|
2019-08-21 21:57:59 +02:00
|
|
|
return f64
|
|
|
|
|
default:
|
|
|
|
|
panic("invalid register")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-29 00:55:53 +02:00
|
|
|
func align(as obj.As) uint64 {
|
|
|
|
|
switch as {
|
|
|
|
|
case AI32Load8S, AI32Load8U, AI64Load8S, AI64Load8U, AI32Store8, AI64Store8:
|
|
|
|
|
return 0
|
|
|
|
|
case AI32Load16S, AI32Load16U, AI64Load16S, AI64Load16U, AI32Store16, AI64Store16:
|
|
|
|
|
return 1
|
|
|
|
|
case AI32Load, AF32Load, AI64Load32S, AI64Load32U, AI32Store, AF32Store, AI64Store32:
|
|
|
|
|
return 2
|
|
|
|
|
case AI64Load, AF64Load, AI64Store, AF64Store:
|
|
|
|
|
return 3
|
|
|
|
|
default:
|
|
|
|
|
panic("align: bad op")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func writeUleb128(w io.ByteWriter, v uint64) {
|
2019-10-12 21:13:28 +05:30
|
|
|
if v < 128 {
|
|
|
|
|
w.WriteByte(uint8(v))
|
|
|
|
|
return
|
|
|
|
|
}
|
2018-03-29 00:55:53 +02:00
|
|
|
more := true
|
|
|
|
|
for more {
|
|
|
|
|
c := uint8(v & 0x7f)
|
|
|
|
|
v >>= 7
|
|
|
|
|
more = v != 0
|
|
|
|
|
if more {
|
|
|
|
|
c |= 0x80
|
|
|
|
|
}
|
|
|
|
|
w.WriteByte(c)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func writeSleb128(w io.ByteWriter, v int64) {
|
|
|
|
|
more := true
|
|
|
|
|
for more {
|
|
|
|
|
c := uint8(v & 0x7f)
|
|
|
|
|
s := uint8(v & 0x40)
|
|
|
|
|
v >>= 7
|
|
|
|
|
more = !((v == 0 && s == 0) || (v == -1 && s != 0))
|
|
|
|
|
if more {
|
|
|
|
|
c |= 0x80
|
|
|
|
|
}
|
|
|
|
|
w.WriteByte(c)
|
|
|
|
|
}
|
|
|
|
|
}
|