mirror of
https://github.com/golang/go.git
synced 2025-10-19 11:03:18 +00:00
cmd/compile: add floating point min/max intrinsics on s390x
Add the VECTOR FP (MINIMUM|MAXIMUM) instructions to the assembler and use them in the compiler to implement min and max. Note: I've allowed floating point registers to be used with the single element instructions (those with the W instead of V prefix) to allow easier integration into the compiler. Change-Id: I5f80a510bd248cf483cce95f1979bf63fbae7de6 Reviewed-on: https://go-review.googlesource.com/c/go/+/684715 Reviewed-by: Keith Randall <khr@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Mark Freeman <mark@golang.org> Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
parent
82a1921c3b
commit
cedf63616a
12 changed files with 160 additions and 2 deletions
12
src/cmd/asm/internal/asm/testdata/s390x.s
vendored
12
src/cmd/asm/internal/asm/testdata/s390x.s
vendored
|
@ -540,6 +540,18 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16-
|
|||
VSTRCZBS V18, V20, V22, V24 // e78240306f8a
|
||||
VSTRCZHS V18, V20, V22, V24 // e78241306f8a
|
||||
VSTRCZFS V18, V20, V22, V24 // e78242306f8a
|
||||
VFMAXSB $1, V2, V3, V4 // e742301020ef
|
||||
WFMAXSB $2, V5, V6, V7 // e775602820ef
|
||||
WFMAXSB $2, F5, F6, F7 // e775602820ef
|
||||
VFMAXDB $3, V8, V9, V10 // e7a8903030ef
|
||||
WFMAXDB $4, V11, V12, V13 // e7dbc04830ef
|
||||
WFMAXDB $4, F11, F12, F13 // e7dbc04830ef
|
||||
VFMINSB $7, V14, V15, V16 // e70ef07028ee
|
||||
WFMINSB $8, V17, V18, V19 // e73120882eee
|
||||
WFMINSB $8, F1, F2, F3 // e731208820ee
|
||||
VFMINDB $9, V20, V21, V22 // e76450903eee
|
||||
WFMINDB $10, V23, V24, V25 // e79780a83eee
|
||||
WFMINDB $10, F7, F8, F9 // e79780a830ee
|
||||
|
||||
RET
|
||||
RET foo(SB)
|
||||
|
|
|
@ -281,6 +281,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
case ssa.OpS390XCPSDR:
|
||||
p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
|
||||
p.Reg = v.Args[0].Reg()
|
||||
case ssa.OpS390XWFMAXDB, ssa.OpS390XWFMAXSB,
|
||||
ssa.OpS390XWFMINDB, ssa.OpS390XWFMINSB:
|
||||
p := opregregimm(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg(), 1 /* Java Math.Max() */)
|
||||
p.AddRestSource(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()})
|
||||
case ssa.OpS390XDIVD, ssa.OpS390XDIVW,
|
||||
ssa.OpS390XDIVDU, ssa.OpS390XDIVWU,
|
||||
ssa.OpS390XMODD, ssa.OpS390XMODW,
|
||||
|
|
|
@ -145,6 +145,9 @@
|
|||
|
||||
(Sqrt32 ...) => (FSQRTS ...)
|
||||
|
||||
(Max(64|32)F ...) => (WFMAX(D|S)B ...)
|
||||
(Min(64|32)F ...) => (WFMIN(D|S)B ...)
|
||||
|
||||
// Atomic loads and stores.
|
||||
// The SYNC instruction (fast-BCR-serialization) prevents store-load
|
||||
// reordering. Other sequences of memory operations (load-load,
|
||||
|
|
|
@ -222,6 +222,12 @@ func init() {
|
|||
{name: "LNDFR", argLength: 1, reg: fp11, asm: "LNDFR"}, // fp64/fp32 clear sign bit
|
||||
{name: "CPSDR", argLength: 2, reg: fp21, asm: "CPSDR"}, // fp64/fp32 copy arg1 sign bit to arg0
|
||||
|
||||
// Single element vector floating point min / max instructions
|
||||
{name: "WFMAXDB", argLength: 2, reg: fp21, asm: "WFMAXDB", typ: "Float64"}, // max[float64](arg0, arg1)
|
||||
{name: "WFMAXSB", argLength: 2, reg: fp21, asm: "WFMAXSB", typ: "Float32"}, // max[float32](arg0, arg1)
|
||||
{name: "WFMINDB", argLength: 2, reg: fp21, asm: "WFMINDB", typ: "Float64"}, // min[float64](arg0, arg1)
|
||||
{name: "WFMINSB", argLength: 2, reg: fp21, asm: "WFMINSB", typ: "Float32"}, // min[float32](arg0, arg1)
|
||||
|
||||
// Round to integer, float64 only.
|
||||
//
|
||||
// aux | rounding mode
|
||||
|
|
|
@ -2655,6 +2655,10 @@ const (
|
|||
OpS390XLPDFR
|
||||
OpS390XLNDFR
|
||||
OpS390XCPSDR
|
||||
OpS390XWFMAXDB
|
||||
OpS390XWFMAXSB
|
||||
OpS390XWFMINDB
|
||||
OpS390XWFMINSB
|
||||
OpS390XFIDBR
|
||||
OpS390XFMOVSload
|
||||
OpS390XFMOVDload
|
||||
|
@ -35775,6 +35779,62 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "WFMAXDB",
|
||||
argLen: 2,
|
||||
asm: s390x.AWFMAXDB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "WFMAXSB",
|
||||
argLen: 2,
|
||||
asm: s390x.AWFMAXSB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "WFMINDB",
|
||||
argLen: 2,
|
||||
asm: s390x.AWFMINDB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "WFMINSB",
|
||||
argLen: 2,
|
||||
asm: s390x.AWFMINSB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FIDBR",
|
||||
auxType: auxInt8,
|
||||
|
|
|
@ -368,6 +368,18 @@ func rewriteValueS390X(v *Value) bool {
|
|||
return rewriteValueS390X_OpLsh8x64(v)
|
||||
case OpLsh8x8:
|
||||
return rewriteValueS390X_OpLsh8x8(v)
|
||||
case OpMax32F:
|
||||
v.Op = OpS390XWFMAXSB
|
||||
return true
|
||||
case OpMax64F:
|
||||
v.Op = OpS390XWFMAXDB
|
||||
return true
|
||||
case OpMin32F:
|
||||
v.Op = OpS390XWFMINSB
|
||||
return true
|
||||
case OpMin64F:
|
||||
v.Op = OpS390XWFMINDB
|
||||
return true
|
||||
case OpMod16:
|
||||
return rewriteValueS390X_OpMod16(v)
|
||||
case OpMod16u:
|
||||
|
|
|
@ -3986,7 +3986,7 @@ func (s *state) minMax(n *ir.CallExpr) *ssa.Value {
|
|||
if typ.IsFloat() {
|
||||
hasIntrinsic := false
|
||||
switch Arch.LinkArch.Family {
|
||||
case sys.AMD64, sys.ARM64, sys.Loong64, sys.RISCV64:
|
||||
case sys.AMD64, sys.ARM64, sys.Loong64, sys.RISCV64, sys.S390X:
|
||||
hasIntrinsic = true
|
||||
case sys.PPC64:
|
||||
hasIntrinsic = buildcfg.GOPPC64 >= 9
|
||||
|
|
|
@ -715,6 +715,14 @@ const (
|
|||
AWFLNDB
|
||||
AVFLPDB
|
||||
AWFLPDB
|
||||
AVFMAXDB
|
||||
AWFMAXDB
|
||||
AVFMAXSB
|
||||
AWFMAXSB
|
||||
AVFMINDB
|
||||
AWFMINDB
|
||||
AVFMINSB
|
||||
AWFMINSB
|
||||
AVFSQ
|
||||
AVFSQDB
|
||||
AWFSQDB
|
||||
|
|
|
@ -438,6 +438,14 @@ var Anames = []string{
|
|||
"WFLNDB",
|
||||
"VFLPDB",
|
||||
"WFLPDB",
|
||||
"VFMAXDB",
|
||||
"WFMAXDB",
|
||||
"VFMAXSB",
|
||||
"WFMAXSB",
|
||||
"VFMINDB",
|
||||
"WFMINDB",
|
||||
"VFMINSB",
|
||||
"WFMINSB",
|
||||
"VFSQ",
|
||||
"VFSQDB",
|
||||
"WFSQDB",
|
||||
|
|
|
@ -441,6 +441,11 @@ var optab = []Optab{
|
|||
{i: 119, as: AVERLLVG, a1: C_VREG, a2: C_VREG, a6: C_VREG},
|
||||
{i: 119, as: AVERLLVG, a1: C_VREG, a6: C_VREG},
|
||||
|
||||
// VRR-c floating point min/max
|
||||
{i: 128, as: AVFMAXDB, a1: C_SCON, a2: C_VREG, a3: C_VREG, a6: C_VREG},
|
||||
{i: 128, as: AWFMAXDB, a1: C_SCON, a2: C_VREG, a3: C_VREG, a6: C_VREG},
|
||||
{i: 128, as: AWFMAXDB, a1: C_SCON, a2: C_FREG, a3: C_FREG, a6: C_FREG},
|
||||
|
||||
// VRR-d
|
||||
{i: 120, as: AVACQ, a1: C_VREG, a2: C_VREG, a3: C_VREG, a6: C_VREG},
|
||||
|
||||
|
@ -1480,6 +1485,14 @@ func buildop(ctxt *obj.Link) {
|
|||
opset(AVFMSDB, r)
|
||||
opset(AWFMSDB, r)
|
||||
opset(AVPERM, r)
|
||||
case AVFMAXDB:
|
||||
opset(AVFMAXSB, r)
|
||||
opset(AVFMINDB, r)
|
||||
opset(AVFMINSB, r)
|
||||
case AWFMAXDB:
|
||||
opset(AWFMAXSB, r)
|
||||
opset(AWFMINDB, r)
|
||||
opset(AWFMINSB, r)
|
||||
case AKM:
|
||||
opset(AKMC, r)
|
||||
opset(AKLMD, r)
|
||||
|
@ -2636,6 +2649,8 @@ const (
|
|||
op_VUPLL uint32 = 0xE7D4 // VRR-a VECTOR UNPACK LOGICAL LOW
|
||||
op_VUPL uint32 = 0xE7D6 // VRR-a VECTOR UNPACK LOW
|
||||
op_VMSL uint32 = 0xE7B8 // VRR-d VECTOR MULTIPLY SUM LOGICAL
|
||||
op_VFMAX uint32 = 0xE7EF // VRR-c VECTOR FP MAXIMUM
|
||||
op_VFMIN uint32 = 0xE7EE // VRR-c VECTOR FP MINIMUM
|
||||
|
||||
// added in z15
|
||||
op_KDSA uint32 = 0xB93A // FORMAT_RRE COMPUTE DIGITAL SIGNATURE AUTHENTICATION (KDSA)
|
||||
|
@ -4475,6 +4490,12 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) {
|
|||
c.ctxt.Diag("padding byte register cannot be same as input or output register %v", p)
|
||||
}
|
||||
zRS(op_MVCLE, uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), uint32(d2), asm)
|
||||
|
||||
case 128: // VRR-c floating point max/min
|
||||
op, m4, _ := vop(p.As)
|
||||
m5 := singleElementMask(p.As)
|
||||
m6 := uint32(c.vregoff(&p.From))
|
||||
zVRRc(op, uint32(p.To.Reg), uint32(p.Reg), uint32(p.GetFrom3().Reg), m6, m5, m4, asm)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1027,6 +1027,22 @@ func vop(as obj.As) (opcode, es, cs uint32) {
|
|||
return op_VUPL, 1, 0
|
||||
case AVUPLF:
|
||||
return op_VUPL, 2, 0
|
||||
case AVFMAXDB:
|
||||
return op_VFMAX, 3, 0
|
||||
case AWFMAXDB:
|
||||
return op_VFMAX, 3, 0
|
||||
case AVFMAXSB:
|
||||
return op_VFMAX, 2, 0
|
||||
case AWFMAXSB:
|
||||
return op_VFMAX, 2, 0
|
||||
case AVFMINDB:
|
||||
return op_VFMIN, 3, 0
|
||||
case AWFMINDB:
|
||||
return op_VFMIN, 3, 0
|
||||
case AVFMINSB:
|
||||
return op_VFMIN, 2, 0
|
||||
case AWFMINSB:
|
||||
return op_VFMIN, 2, 0
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1062,7 +1078,11 @@ func singleElementMask(as obj.As) uint32 {
|
|||
AWFSQDB,
|
||||
AWFSDB,
|
||||
AWFTCIDB,
|
||||
AWFIDB:
|
||||
AWFIDB,
|
||||
AWFMAXDB,
|
||||
AWFMAXSB,
|
||||
AWFMINDB,
|
||||
AWFMINSB:
|
||||
return 8
|
||||
}
|
||||
return 0
|
||||
|
|
|
@ -172,6 +172,7 @@ func Float64Min(a, b float64) float64 {
|
|||
// riscv64:"FMIN"
|
||||
// ppc64/power9:"XSMINJDP"
|
||||
// ppc64/power10:"XSMINJDP"
|
||||
// s390x: "WFMINDB"
|
||||
return min(a, b)
|
||||
}
|
||||
|
||||
|
@ -182,6 +183,7 @@ func Float64Max(a, b float64) float64 {
|
|||
// riscv64:"FMAX"
|
||||
// ppc64/power9:"XSMAXJDP"
|
||||
// ppc64/power10:"XSMAXJDP"
|
||||
// s390x: "WFMAXDB"
|
||||
return max(a, b)
|
||||
}
|
||||
|
||||
|
@ -192,6 +194,7 @@ func Float32Min(a, b float32) float32 {
|
|||
// riscv64:"FMINS"
|
||||
// ppc64/power9:"XSMINJDP"
|
||||
// ppc64/power10:"XSMINJDP"
|
||||
// s390x: "WFMINSB"
|
||||
return min(a, b)
|
||||
}
|
||||
|
||||
|
@ -202,6 +205,7 @@ func Float32Max(a, b float32) float32 {
|
|||
// riscv64:"FMAXS"
|
||||
// ppc64/power9:"XSMAXJDP"
|
||||
// ppc64/power10:"XSMAXJDP"
|
||||
// s390x: "WFMAXSB"
|
||||
return max(a, b)
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue