cmd/compile: add floating point min/max intrinsics on s390x

Add the VECTOR FP (MINIMUM|MAXIMUM) instructions to the assembler and
use them in the compiler to implement min and max.

Note: I've allowed floating point registers to be used with the single
element instructions (those with the W instead of V prefix) to allow
easier integration into the compiler.

Change-Id: I5f80a510bd248cf483cce95f1979bf63fbae7de6
Reviewed-on: https://go-review.googlesource.com/c/go/+/684715
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Mark Freeman <mark@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
Michael Munday 2025-06-27 21:05:38 +01:00
parent 82a1921c3b
commit cedf63616a
12 changed files with 160 additions and 2 deletions

View file

@ -540,6 +540,18 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16-
VSTRCZBS V18, V20, V22, V24 // e78240306f8a
VSTRCZHS V18, V20, V22, V24 // e78241306f8a
VSTRCZFS V18, V20, V22, V24 // e78242306f8a
VFMAXSB $1, V2, V3, V4 // e742301020ef
WFMAXSB $2, V5, V6, V7 // e775602820ef
WFMAXSB $2, F5, F6, F7 // e775602820ef
VFMAXDB $3, V8, V9, V10 // e7a8903030ef
WFMAXDB $4, V11, V12, V13 // e7dbc04830ef
WFMAXDB $4, F11, F12, F13 // e7dbc04830ef
VFMINSB $7, V14, V15, V16 // e70ef07028ee
WFMINSB $8, V17, V18, V19 // e73120882eee
WFMINSB $8, F1, F2, F3 // e731208820ee
VFMINDB $9, V20, V21, V22 // e76450903eee
WFMINDB $10, V23, V24, V25 // e79780a83eee
WFMINDB $10, F7, F8, F9 // e79780a830ee
RET
RET foo(SB)

View file

@ -281,6 +281,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
case ssa.OpS390XCPSDR:
p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
p.Reg = v.Args[0].Reg()
case ssa.OpS390XWFMAXDB, ssa.OpS390XWFMAXSB,
ssa.OpS390XWFMINDB, ssa.OpS390XWFMINSB:
p := opregregimm(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg(), 1 /* Java Math.Max() */)
p.AddRestSource(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()})
case ssa.OpS390XDIVD, ssa.OpS390XDIVW,
ssa.OpS390XDIVDU, ssa.OpS390XDIVWU,
ssa.OpS390XMODD, ssa.OpS390XMODW,

View file

@ -145,6 +145,9 @@
(Sqrt32 ...) => (FSQRTS ...)
(Max(64|32)F ...) => (WFMAX(D|S)B ...)
(Min(64|32)F ...) => (WFMIN(D|S)B ...)
// Atomic loads and stores.
// The SYNC instruction (fast-BCR-serialization) prevents store-load
// reordering. Other sequences of memory operations (load-load,

View file

@ -222,6 +222,12 @@ func init() {
{name: "LNDFR", argLength: 1, reg: fp11, asm: "LNDFR"}, // fp64/fp32 clear sign bit
{name: "CPSDR", argLength: 2, reg: fp21, asm: "CPSDR"}, // fp64/fp32 copy arg1 sign bit to arg0
// Single element vector floating point min / max instructions
{name: "WFMAXDB", argLength: 2, reg: fp21, asm: "WFMAXDB", typ: "Float64"}, // max[float64](arg0, arg1)
{name: "WFMAXSB", argLength: 2, reg: fp21, asm: "WFMAXSB", typ: "Float32"}, // max[float32](arg0, arg1)
{name: "WFMINDB", argLength: 2, reg: fp21, asm: "WFMINDB", typ: "Float64"}, // min[float64](arg0, arg1)
{name: "WFMINSB", argLength: 2, reg: fp21, asm: "WFMINSB", typ: "Float32"}, // min[float32](arg0, arg1)
// Round to integer, float64 only.
//
// aux | rounding mode

View file

@ -2655,6 +2655,10 @@ const (
OpS390XLPDFR
OpS390XLNDFR
OpS390XCPSDR
OpS390XWFMAXDB
OpS390XWFMAXSB
OpS390XWFMINDB
OpS390XWFMINSB
OpS390XFIDBR
OpS390XFMOVSload
OpS390XFMOVDload
@ -35775,6 +35779,62 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "WFMAXDB",
argLen: 2,
asm: s390x.AWFMAXDB,
reg: regInfo{
inputs: []inputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
outputs: []outputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
},
},
{
name: "WFMAXSB",
argLen: 2,
asm: s390x.AWFMAXSB,
reg: regInfo{
inputs: []inputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
outputs: []outputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
},
},
{
name: "WFMINDB",
argLen: 2,
asm: s390x.AWFMINDB,
reg: regInfo{
inputs: []inputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
outputs: []outputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
},
},
{
name: "WFMINSB",
argLen: 2,
asm: s390x.AWFMINSB,
reg: regInfo{
inputs: []inputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
outputs: []outputInfo{
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
},
},
},
{
name: "FIDBR",
auxType: auxInt8,

View file

@ -368,6 +368,18 @@ func rewriteValueS390X(v *Value) bool {
return rewriteValueS390X_OpLsh8x64(v)
case OpLsh8x8:
return rewriteValueS390X_OpLsh8x8(v)
case OpMax32F:
v.Op = OpS390XWFMAXSB
return true
case OpMax64F:
v.Op = OpS390XWFMAXDB
return true
case OpMin32F:
v.Op = OpS390XWFMINSB
return true
case OpMin64F:
v.Op = OpS390XWFMINDB
return true
case OpMod16:
return rewriteValueS390X_OpMod16(v)
case OpMod16u:

View file

@ -3986,7 +3986,7 @@ func (s *state) minMax(n *ir.CallExpr) *ssa.Value {
if typ.IsFloat() {
hasIntrinsic := false
switch Arch.LinkArch.Family {
case sys.AMD64, sys.ARM64, sys.Loong64, sys.RISCV64:
case sys.AMD64, sys.ARM64, sys.Loong64, sys.RISCV64, sys.S390X:
hasIntrinsic = true
case sys.PPC64:
hasIntrinsic = buildcfg.GOPPC64 >= 9

View file

@ -715,6 +715,14 @@ const (
AWFLNDB
AVFLPDB
AWFLPDB
AVFMAXDB
AWFMAXDB
AVFMAXSB
AWFMAXSB
AVFMINDB
AWFMINDB
AVFMINSB
AWFMINSB
AVFSQ
AVFSQDB
AWFSQDB

View file

@ -438,6 +438,14 @@ var Anames = []string{
"WFLNDB",
"VFLPDB",
"WFLPDB",
"VFMAXDB",
"WFMAXDB",
"VFMAXSB",
"WFMAXSB",
"VFMINDB",
"WFMINDB",
"VFMINSB",
"WFMINSB",
"VFSQ",
"VFSQDB",
"WFSQDB",

View file

@ -441,6 +441,11 @@ var optab = []Optab{
{i: 119, as: AVERLLVG, a1: C_VREG, a2: C_VREG, a6: C_VREG},
{i: 119, as: AVERLLVG, a1: C_VREG, a6: C_VREG},
// VRR-c floating point min/max
{i: 128, as: AVFMAXDB, a1: C_SCON, a2: C_VREG, a3: C_VREG, a6: C_VREG},
{i: 128, as: AWFMAXDB, a1: C_SCON, a2: C_VREG, a3: C_VREG, a6: C_VREG},
{i: 128, as: AWFMAXDB, a1: C_SCON, a2: C_FREG, a3: C_FREG, a6: C_FREG},
// VRR-d
{i: 120, as: AVACQ, a1: C_VREG, a2: C_VREG, a3: C_VREG, a6: C_VREG},
@ -1480,6 +1485,14 @@ func buildop(ctxt *obj.Link) {
opset(AVFMSDB, r)
opset(AWFMSDB, r)
opset(AVPERM, r)
case AVFMAXDB:
opset(AVFMAXSB, r)
opset(AVFMINDB, r)
opset(AVFMINSB, r)
case AWFMAXDB:
opset(AWFMAXSB, r)
opset(AWFMINDB, r)
opset(AWFMINSB, r)
case AKM:
opset(AKMC, r)
opset(AKLMD, r)
@ -2636,6 +2649,8 @@ const (
op_VUPLL uint32 = 0xE7D4 // VRR-a VECTOR UNPACK LOGICAL LOW
op_VUPL uint32 = 0xE7D6 // VRR-a VECTOR UNPACK LOW
op_VMSL uint32 = 0xE7B8 // VRR-d VECTOR MULTIPLY SUM LOGICAL
op_VFMAX uint32 = 0xE7EF // VRR-c VECTOR FP MAXIMUM
op_VFMIN uint32 = 0xE7EE // VRR-c VECTOR FP MINIMUM
// added in z15
op_KDSA uint32 = 0xB93A // FORMAT_RRE COMPUTE DIGITAL SIGNATURE AUTHENTICATION (KDSA)
@ -4475,6 +4490,12 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) {
c.ctxt.Diag("padding byte register cannot be same as input or output register %v", p)
}
zRS(op_MVCLE, uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), uint32(d2), asm)
case 128: // VRR-c floating point max/min
op, m4, _ := vop(p.As)
m5 := singleElementMask(p.As)
m6 := uint32(c.vregoff(&p.From))
zVRRc(op, uint32(p.To.Reg), uint32(p.Reg), uint32(p.GetFrom3().Reg), m6, m5, m4, asm)
}
}

View file

@ -1027,6 +1027,22 @@ func vop(as obj.As) (opcode, es, cs uint32) {
return op_VUPL, 1, 0
case AVUPLF:
return op_VUPL, 2, 0
case AVFMAXDB:
return op_VFMAX, 3, 0
case AWFMAXDB:
return op_VFMAX, 3, 0
case AVFMAXSB:
return op_VFMAX, 2, 0
case AWFMAXSB:
return op_VFMAX, 2, 0
case AVFMINDB:
return op_VFMIN, 3, 0
case AWFMINDB:
return op_VFMIN, 3, 0
case AVFMINSB:
return op_VFMIN, 2, 0
case AWFMINSB:
return op_VFMIN, 2, 0
}
}
@ -1062,7 +1078,11 @@ func singleElementMask(as obj.As) uint32 {
AWFSQDB,
AWFSDB,
AWFTCIDB,
AWFIDB:
AWFIDB,
AWFMAXDB,
AWFMAXSB,
AWFMINDB,
AWFMINSB:
return 8
}
return 0

View file

@ -172,6 +172,7 @@ func Float64Min(a, b float64) float64 {
// riscv64:"FMIN"
// ppc64/power9:"XSMINJDP"
// ppc64/power10:"XSMINJDP"
// s390x: "WFMINDB"
return min(a, b)
}
@ -182,6 +183,7 @@ func Float64Max(a, b float64) float64 {
// riscv64:"FMAX"
// ppc64/power9:"XSMAXJDP"
// ppc64/power10:"XSMAXJDP"
// s390x: "WFMAXDB"
return max(a, b)
}
@ -192,6 +194,7 @@ func Float32Min(a, b float32) float32 {
// riscv64:"FMINS"
// ppc64/power9:"XSMINJDP"
// ppc64/power10:"XSMINJDP"
// s390x: "WFMINSB"
return min(a, b)
}
@ -202,6 +205,7 @@ func Float32Max(a, b float32) float32 {
// riscv64:"FMAXS"
// ppc64/power9:"XSMAXJDP"
// ppc64/power10:"XSMAXJDP"
// s390x: "WFMAXSB"
return max(a, b)
}