cmd/internal/obj/loong64: add {,x}vmadd series instructions support

Go asm syntax:
	  VMADD{B, H, W, V}                         V1, V2, V3
	  VMSUB{B, H, W, V}                         V1, V2, V3
	 XVMADD{B, H, W, V}                         X1, X2, X3
	 XVMSUB{B, H, W, V}                         X1, X2, X3
	 VMADDWEV{HB, WH, VW,QV}{,U}                V1, V2, V3
	 VMADDWOD{HB, WH, VW,QV}{,U}                V1, V2, V3
	XVMADDWEV{HB, WH, VW,QV}{,U}                X1, X2, X3
	XVMADDWOD{HB, WH, VW,QV}{,U}                X1, X2, X3
	 VMADDWEV{HBUB, WHUH, VWUW, QVUV}           V1, V2, V3
	 VMADDWOD{HBUB, WHUH, VWUW, QVUV}           V1, V2, V3
	XVMADDWEV{HBUB, WHUH, VWUW, QVUV}           X1, X2, X3
	XVMADDWOD{HBUB, WHUH, VWUW, QVUV}           X1, X2, X3

Equivalent platform assembler syntax:
	 vmadd.{b,h,w,d}                            v3, v2, v1
	 vmsub.{b,h,w,d}                            v3, v2, v1
	xvmadd.{b,h,w,d}                            x3, x2, x1
	xvmsub.{b,h,w,d}                            x3, x2, x1
	 vmaddwev.{h.b, w.h, d.w, q.d}{,u}          v3, v2, v1
	 vmaddwod.{h.b, w.h, d.w, q.d}{,u}          v3, v2, v1
	xvmaddwev.{h.b, w.h, d.w, q.d}{,u}          x3, x2, x1
	xvmaddwod.{h.b, w.h, d.w, q.d}{,u}          x3, x2, x1
	 vmaddwev.{h.bu.b, d.wu.w, d.wu.w, q.du.d}  v3, v2, v1
	 vmaddwod.{h.bu.b, d.wu.w, d.wu.w, q.du.d}  v3, v2, v1
	xvmaddwev.{h.bu.b, d.wu.w, d.wu.w, q.du.d}  x3, x2, x1
	xvmaddwod.{h.bu.b, d.wu.w, d.wu.w, q.du.d}  x3, x2, x1

Change-Id: I2f4aae51045e1596d4744e525a1589586065cf8e
Reviewed-on: https://go-review.googlesource.com/c/go/+/724200
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: abner chenc <chenguoqi@loongson.cn>
This commit is contained in:
Guoqi Chen 2025-11-24 20:19:06 +08:00 committed by Gopher Robot
parent c0f02c11ff
commit e0a4dffb0c
4 changed files with 400 additions and 0 deletions

View file

@ -1163,6 +1163,78 @@ lable2:
XVSUBWODVWU X1, X2, X3 // 43043574 XVSUBWODVWU X1, X2, X3 // 43043574
XVSUBWODQVU X1, X2, X3 // 43843574 XVSUBWODQVU X1, X2, X3 // 43843574
// [X]VMADD.{B/H/W/D}, [X]VMSUB.{B/H/W/D} instructions
VMADDB V1, V2, V3 // 4304a870
VMADDH V1, V2, V3 // 4384a870
VMADDW V1, V2, V3 // 4304a970
VMADDV V1, V2, V3 // 4384a970
VMSUBB V1, V2, V3 // 4304aa70
VMSUBH V1, V2, V3 // 4384aa70
VMSUBW V1, V2, V3 // 4304ab70
VMSUBV V1, V2, V3 // 4384ab70
XVMADDB X1, X2, X3 // 4304a874
XVMADDH X1, X2, X3 // 4384a874
XVMADDW X1, X2, X3 // 4304a974
XVMADDV X1, X2, X3 // 4384a974
XVMSUBB X1, X2, X3 // 4304aa74
XVMSUBH X1, X2, X3 // 4384aa74
XVMSUBW X1, X2, X3 // 4304ab74
XVMSUBV X1, X2, X3 // 4384ab74
// [X]VMADDW{EV/OD}.{H.B/W.H/D.W/Q.D} instructions
VMADDWEVHB V1, V2, V3 // 4304ac70
VMADDWEVWH V1, V2, V3 // 4384ac70
VMADDWEVVW V1, V2, V3 // 4304ad70
VMADDWEVQV V1, V2, V3 // 4384ad70
VMADDWODHB V1, V2, V3 // 4304ae70
VMADDWODWH V1, V2, V3 // 4384ae70
VMADDWODVW V1, V2, V3 // 4304af70
VMADDWODQV V1, V2, V3 // 4384af70
XVMADDWEVHB X1, X2, X3 // 4304ac74
XVMADDWEVWH X1, X2, X3 // 4384ac74
XVMADDWEVVW X1, X2, X3 // 4304ad74
XVMADDWEVQV X1, X2, X3 // 4384ad74
XVMADDWODHB X1, X2, X3 // 4304ae74
XVMADDWODWH X1, X2, X3 // 4384ae74
XVMADDWODVW X1, X2, X3 // 4304af74
XVMADDWODQV X1, X2, X3 // 4384af74
// [X]VMADDW{EV/OD}.{H.B/W.H/D.W/Q.D}U instructions
VMADDWEVHBU V1, V2, V3 // 4304b470
VMADDWEVWHU V1, V2, V3 // 4384b470
VMADDWEVVWU V1, V2, V3 // 4304b570
VMADDWEVQVU V1, V2, V3 // 4384b570
VMADDWODHBU V1, V2, V3 // 4304b670
VMADDWODWHU V1, V2, V3 // 4384b670
VMADDWODVWU V1, V2, V3 // 4304b770
VMADDWODQVU V1, V2, V3 // 4384b770
XVMADDWEVHBU X1, X2, X3 // 4304b474
XVMADDWEVWHU X1, X2, X3 // 4384b474
XVMADDWEVVWU X1, X2, X3 // 4304b574
XVMADDWEVQVU X1, X2, X3 // 4384b574
XVMADDWODHBU X1, X2, X3 // 4304b674
XVMADDWODWHU X1, X2, X3 // 4384b674
XVMADDWODVWU X1, X2, X3 // 4304b774
XVMADDWODQVU X1, X2, X3 // 4384b774
// [X]VMADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D} instructions
VMADDWEVHBUB V1, V2, V3 // 4304bc70
VMADDWEVWHUH V1, V2, V3 // 4384bc70
VMADDWEVVWUW V1, V2, V3 // 4304bd70
VMADDWEVQVUV V1, V2, V3 // 4384bd70
VMADDWODHBUB V1, V2, V3 // 4304be70
VMADDWODWHUH V1, V2, V3 // 4384be70
VMADDWODVWUW V1, V2, V3 // 4304bf70
VMADDWODQVUV V1, V2, V3 // 4384bf70
XVMADDWEVHBUB X1, X2, X3 // 4304bc74
XVMADDWEVWHUH X1, X2, X3 // 4384bc74
XVMADDWEVVWUW X1, X2, X3 // 4304bd74
XVMADDWEVQVUV X1, X2, X3 // 4384bd74
XVMADDWODHBUB X1, X2, X3 // 4304be74
XVMADDWODWHUH X1, X2, X3 // 4384be74
XVMADDWODVWUW X1, X2, X3 // 4304bf74
XVMADDWODQVUV X1, X2, X3 // 4384bf74
// [X]VSHUF4I.{B/H/W/D} instructions // [X]VSHUF4I.{B/H/W/D} instructions
VSHUF4IB $0, V2, V1 // 41009073 VSHUF4IB $0, V2, V1 // 41009073
VSHUF4IB $16, V2, V1 // 41409073 VSHUF4IB $16, V2, V1 // 41409073

View file

@ -1227,6 +1227,78 @@ const (
AXVSUBWODVWU AXVSUBWODVWU
AXVSUBWODQVU AXVSUBWODQVU
AVMADDB
AVMADDH
AVMADDW
AVMADDV
AVMSUBB
AVMSUBH
AVMSUBW
AVMSUBV
AXVMADDB
AXVMADDH
AXVMADDW
AXVMADDV
AXVMSUBB
AXVMSUBH
AXVMSUBW
AXVMSUBV
AVMADDWEVHB
AVMADDWEVWH
AVMADDWEVVW
AVMADDWEVQV
AVMADDWODHB
AVMADDWODWH
AVMADDWODVW
AVMADDWODQV
AVMADDWEVHBU
AVMADDWEVWHU
AVMADDWEVVWU
AVMADDWEVQVU
AVMADDWODHBU
AVMADDWODWHU
AVMADDWODVWU
AVMADDWODQVU
AVMADDWEVHBUB
AVMADDWEVWHUH
AVMADDWEVVWUW
AVMADDWEVQVUV
AVMADDWODHBUB
AVMADDWODWHUH
AVMADDWODVWUW
AVMADDWODQVUV
AXVMADDWEVHB
AXVMADDWEVWH
AXVMADDWEVVW
AXVMADDWEVQV
AXVMADDWODHB
AXVMADDWODWH
AXVMADDWODVW
AXVMADDWODQV
AXVMADDWEVHBU
AXVMADDWEVWHU
AXVMADDWEVVWU
AXVMADDWEVQVU
AXVMADDWODHBU
AXVMADDWODWHU
AXVMADDWODVWU
AXVMADDWODQVU
AXVMADDWEVHBUB
AXVMADDWEVWHUH
AXVMADDWEVVWUW
AXVMADDWEVQVUV
AXVMADDWODHBUB
AXVMADDWODWHUH
AXVMADDWODVWUW
AXVMADDWODQVUV
AVSHUF4IB AVSHUF4IB
AVSHUF4IH AVSHUF4IH
AVSHUF4IW AVSHUF4IW

View file

@ -695,6 +695,70 @@ var Anames = []string{
"XVSUBWODWHU", "XVSUBWODWHU",
"XVSUBWODVWU", "XVSUBWODVWU",
"XVSUBWODQVU", "XVSUBWODQVU",
"VMADDB",
"VMADDH",
"VMADDW",
"VMADDV",
"VMSUBB",
"VMSUBH",
"VMSUBW",
"VMSUBV",
"XVMADDB",
"XVMADDH",
"XVMADDW",
"XVMADDV",
"XVMSUBB",
"XVMSUBH",
"XVMSUBW",
"XVMSUBV",
"VMADDWEVHB",
"VMADDWEVWH",
"VMADDWEVVW",
"VMADDWEVQV",
"VMADDWODHB",
"VMADDWODWH",
"VMADDWODVW",
"VMADDWODQV",
"VMADDWEVHBU",
"VMADDWEVWHU",
"VMADDWEVVWU",
"VMADDWEVQVU",
"VMADDWODHBU",
"VMADDWODWHU",
"VMADDWODVWU",
"VMADDWODQVU",
"VMADDWEVHBUB",
"VMADDWEVWHUH",
"VMADDWEVVWUW",
"VMADDWEVQVUV",
"VMADDWODHBUB",
"VMADDWODWHUH",
"VMADDWODVWUW",
"VMADDWODQVUV",
"XVMADDWEVHB",
"XVMADDWEVWH",
"XVMADDWEVVW",
"XVMADDWEVQV",
"XVMADDWODHB",
"XVMADDWODWH",
"XVMADDWODVW",
"XVMADDWODQV",
"XVMADDWEVHBU",
"XVMADDWEVWHU",
"XVMADDWEVVWU",
"XVMADDWEVQVU",
"XVMADDWODHBU",
"XVMADDWODWHU",
"XVMADDWODVWU",
"XVMADDWODQVU",
"XVMADDWEVHBUB",
"XVMADDWEVWHUH",
"XVMADDWEVVWUW",
"XVMADDWEVQVUV",
"XVMADDWODHBUB",
"XVMADDWODWHUH",
"XVMADDWODVWUW",
"XVMADDWODQVUV",
"VSHUF4IB", "VSHUF4IB",
"VSHUF4IH", "VSHUF4IH",
"VSHUF4IW", "VSHUF4IW",

View file

@ -1830,6 +1830,38 @@ func buildop(ctxt *obj.Link) {
opset(AVSUBWODWHU, r0) opset(AVSUBWODWHU, r0)
opset(AVSUBWODVWU, r0) opset(AVSUBWODVWU, r0)
opset(AVSUBWODQVU, r0) opset(AVSUBWODQVU, r0)
opset(AVMADDB, r0)
opset(AVMADDH, r0)
opset(AVMADDW, r0)
opset(AVMADDV, r0)
opset(AVMSUBB, r0)
opset(AVMSUBH, r0)
opset(AVMSUBW, r0)
opset(AVMSUBV, r0)
opset(AVMADDWEVHB, r0)
opset(AVMADDWEVWH, r0)
opset(AVMADDWEVVW, r0)
opset(AVMADDWEVQV, r0)
opset(AVMADDWODHB, r0)
opset(AVMADDWODWH, r0)
opset(AVMADDWODVW, r0)
opset(AVMADDWODQV, r0)
opset(AVMADDWEVHBU, r0)
opset(AVMADDWEVWHU, r0)
opset(AVMADDWEVVWU, r0)
opset(AVMADDWEVQVU, r0)
opset(AVMADDWODHBU, r0)
opset(AVMADDWODWHU, r0)
opset(AVMADDWODVWU, r0)
opset(AVMADDWODQVU, r0)
opset(AVMADDWEVHBUB, r0)
opset(AVMADDWEVWHUH, r0)
opset(AVMADDWEVVWUW, r0)
opset(AVMADDWEVQVUV, r0)
opset(AVMADDWODHBUB, r0)
opset(AVMADDWODWHUH, r0)
opset(AVMADDWODVWUW, r0)
opset(AVMADDWODQVUV, r0)
case AXVSLTB: case AXVSLTB:
opset(AXVSLTH, r0) opset(AXVSLTH, r0)
@ -1871,6 +1903,38 @@ func buildop(ctxt *obj.Link) {
opset(AXVSUBWODWHU, r0) opset(AXVSUBWODWHU, r0)
opset(AXVSUBWODVWU, r0) opset(AXVSUBWODVWU, r0)
opset(AXVSUBWODQVU, r0) opset(AXVSUBWODQVU, r0)
opset(AXVMADDB, r0)
opset(AXVMADDH, r0)
opset(AXVMADDW, r0)
opset(AXVMADDV, r0)
opset(AXVMSUBB, r0)
opset(AXVMSUBH, r0)
opset(AXVMSUBW, r0)
opset(AXVMSUBV, r0)
opset(AXVMADDWEVHB, r0)
opset(AXVMADDWEVWH, r0)
opset(AXVMADDWEVVW, r0)
opset(AXVMADDWEVQV, r0)
opset(AXVMADDWODHB, r0)
opset(AXVMADDWODWH, r0)
opset(AXVMADDWODVW, r0)
opset(AXVMADDWODQV, r0)
opset(AXVMADDWEVHBU, r0)
opset(AXVMADDWEVWHU, r0)
opset(AXVMADDWEVVWU, r0)
opset(AXVMADDWEVQVU, r0)
opset(AXVMADDWODHBU, r0)
opset(AXVMADDWODWHU, r0)
opset(AXVMADDWODVWU, r0)
opset(AXVMADDWODQVU, r0)
opset(AXVMADDWEVHBUB, r0)
opset(AXVMADDWEVWHUH, r0)
opset(AXVMADDWEVVWUW, r0)
opset(AXVMADDWEVQVUV, r0)
opset(AXVMADDWODHBUB, r0)
opset(AXVMADDWODWHUH, r0)
opset(AXVMADDWODVWUW, r0)
opset(AXVMADDWODQVUV, r0)
case AVANDB: case AVANDB:
opset(AVORB, r0) opset(AVORB, r0)
@ -3811,6 +3875,134 @@ func (c *ctxt0) oprrr(a obj.As) uint32 {
return 0x0E86A << 15 // xvsubwod.d.wu return 0x0E86A << 15 // xvsubwod.d.wu
case AXVSUBWODQVU: case AXVSUBWODQVU:
return 0x0E86B << 15 // xvsubwod.q.du return 0x0E86B << 15 // xvsubwod.q.du
case AVMADDB:
return 0x0E150 << 15 // vmadd.b
case AVMADDH:
return 0x0E151 << 15 // vmadd.h
case AVMADDW:
return 0x0E152 << 15 // vmadd.w
case AVMADDV:
return 0x0E153 << 15 // vmadd.d
case AVMSUBB:
return 0x0E154 << 15 // vmsub.b
case AVMSUBH:
return 0x0E155 << 15 // vmsub.h
case AVMSUBW:
return 0x0E156 << 15 // vmsub.w
case AVMSUBV:
return 0x0E157 << 15 // vmsub.d
case AXVMADDB:
return 0x0E950 << 15 // xvmadd.b
case AXVMADDH:
return 0x0E951 << 15 // xvmadd.h
case AXVMADDW:
return 0x0E952 << 15 // xvmadd.w
case AXVMADDV:
return 0x0E953 << 15 // xvmadd.d
case AXVMSUBB:
return 0x0E954 << 15 // xvmsub.b
case AXVMSUBH:
return 0x0E955 << 15 // xvmsub.h
case AXVMSUBW:
return 0x0E956 << 15 // xvmsub.w
case AXVMSUBV:
return 0x0E957 << 15 // xvmsub.d
case AVMADDWEVHB:
return 0x0E158 << 15 // vmaddwev.h.b
case AVMADDWEVWH:
return 0x0E159 << 15 // vmaddwev.w.h
case AVMADDWEVVW:
return 0x0E15A << 15 // vmaddwev.d.w
case AVMADDWEVQV:
return 0x0E15B << 15 // vmaddwev.q.d
case AVMADDWODHB:
return 0x0E15C << 15 // vmaddwov.h.b
case AVMADDWODWH:
return 0x0E15D << 15 // vmaddwod.w.h
case AVMADDWODVW:
return 0x0E15E << 15 // vmaddwod.d.w
case AVMADDWODQV:
return 0x0E15F << 15 // vmaddwod.q.d
case AVMADDWEVHBU:
return 0x0E168 << 15 // vmaddwev.h.bu
case AVMADDWEVWHU:
return 0x0E169 << 15 // vmaddwev.w.hu
case AVMADDWEVVWU:
return 0x0E16A << 15 // vmaddwev.d.wu
case AVMADDWEVQVU:
return 0x0E16B << 15 // vmaddwev.q.du
case AVMADDWODHBU:
return 0x0E16C << 15 // vmaddwov.h.bu
case AVMADDWODWHU:
return 0x0E16D << 15 // vmaddwod.w.hu
case AVMADDWODVWU:
return 0x0E16E << 15 // vmaddwod.d.wu
case AVMADDWODQVU:
return 0x0E16F << 15 // vmaddwod.q.du
case AVMADDWEVHBUB:
return 0x0E178 << 15 // vmaddwev.h.bu.b
case AVMADDWEVWHUH:
return 0x0E179 << 15 // vmaddwev.w.hu.h
case AVMADDWEVVWUW:
return 0x0E17A << 15 // vmaddwev.d.wu.w
case AVMADDWEVQVUV:
return 0x0E17B << 15 // vmaddwev.q.du.d
case AVMADDWODHBUB:
return 0x0E17C << 15 // vmaddwov.h.bu.b
case AVMADDWODWHUH:
return 0x0E17D << 15 // vmaddwod.w.hu.h
case AVMADDWODVWUW:
return 0x0E17E << 15 // vmaddwod.d.wu.w
case AVMADDWODQVUV:
return 0x0E17F << 15 // vmaddwod.q.du.d
case AXVMADDWEVHB:
return 0x0E958 << 15 // xvmaddwev.h.b
case AXVMADDWEVWH:
return 0x0E959 << 15 // xvmaddwev.w.h
case AXVMADDWEVVW:
return 0x0E95A << 15 // xvmaddwev.d.w
case AXVMADDWEVQV:
return 0x0E95B << 15 // xvmaddwev.q.d
case AXVMADDWODHB:
return 0x0E95C << 15 // xvmaddwov.h.b
case AXVMADDWODWH:
return 0x0E95D << 15 // xvmaddwod.w.h
case AXVMADDWODVW:
return 0x0E95E << 15 // xvmaddwod.d.w
case AXVMADDWODQV:
return 0x0E95F << 15 // xvmaddwod.q.d
case AXVMADDWEVHBU:
return 0x0E968 << 15 // xvmaddwev.h.bu
case AXVMADDWEVWHU:
return 0x0E969 << 15 // xvmaddwev.w.hu
case AXVMADDWEVVWU:
return 0x0E96A << 15 // xvmaddwev.d.wu
case AXVMADDWEVQVU:
return 0x0E96B << 15 // xvmaddwev.q.du
case AXVMADDWODHBU:
return 0x0E96C << 15 // xvmaddwov.h.bu
case AXVMADDWODWHU:
return 0x0E96D << 15 // xvmaddwod.w.hu
case AXVMADDWODVWU:
return 0x0E96E << 15 // xvmaddwod.d.wu
case AXVMADDWODQVU:
return 0x0E96F << 15 // xvmaddwod.q.du
case AXVMADDWEVHBUB:
return 0x0E978 << 15 // xvmaddwev.h.bu.b
case AXVMADDWEVWHUH:
return 0x0E979 << 15 // xvmaddwev.w.hu.h
case AXVMADDWEVVWUW:
return 0x0E97A << 15 // xvmaddwev.d.wu.w
case AXVMADDWEVQVUV:
return 0x0E97B << 15 // xvmaddwev.q.du.d
case AXVMADDWODHBUB:
return 0x0E97C << 15 // xvmaddwov.h.bu.b
case AXVMADDWODWHUH:
return 0x0E97D << 15 // xvmaddwod.w.hu.h
case AXVMADDWODVWUW:
return 0x0E97E << 15 // xvmaddwod.d.wu.w
case AXVMADDWODQVUV:
return 0x0E97F << 15 // xvmaddwod.q.du.d
case AVSLLB: case AVSLLB:
return 0xe1d0 << 15 // vsll.b return 0xe1d0 << 15 // vsll.b
case AVSLLH: case AVSLLH: