cmd/internal/obj/loong64: add {V,XV}{ADD/SUB}.{B,H,W,D,Q} and {V,XV}{ADD/SUB}.{B,H,W,D}Uinstructions support

Go asm syntax:
	 V{ADD/SUB}{B,H,W,V,Q}		VK, VJ, VD
	XV{ADD/SUB}{B,H,W,V,Q}		XK, XJ, XD
	 V{ADD/SUB}{B,H,W,V}U		$1, VJ, VD
	XV{ADD/SUB}{B,H,W,V}U		$1, XJ, XD

Equivalent platform assembler syntax:
	 v{add/sub}.{b,h,w,d,q}		vd, vj, vk
	xv{add/sub}.{b,h,w,d,q}		xd, xj, xk
	 v{add/sub}i.{b,h,w,d}u		vd, vj, $1
	xv{add/sub}i.{b,h,w,d}u		xd, xj, $1

Change-Id: Ia1ef0bc062f4403bb0b1514c2cf1c0264f5d22ee
Reviewed-on: https://go-review.googlesource.com/c/go/+/637795
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Xiaolin Zhao 2024-12-19 21:07:42 +08:00 committed by abner chenc
parent 645ea53019
commit 39d7a49736
4 changed files with 229 additions and 0 deletions

View file

@ -679,3 +679,43 @@ lable2:
XVROTRV $0, X2, X1 // 4100a176 XVROTRV $0, X2, X1 // 4100a176
XVROTRV $63, X2, X1 // 41fca176 XVROTRV $63, X2, X1 // 41fca176
XVROTRV $52, X2 // 42d0a176 XVROTRV $52, X2 // 42d0a176
// [X]VADD{B,H,W,V,Q}, [X]VSUB{B,H,W,V,Q} instructions
VADDB V1, V2, V3 // 43040a70
VADDH V1, V2, V3 // 43840a70
VADDW V1, V2, V3 // 43040b70
VADDV V1, V2, V3 // 43840b70
VADDQ V1, V2, V3 // 43042d71
VSUBB V1, V2, V3 // 43040c70
VSUBH V1, V2, V3 // 43840c70
VSUBW V1, V2, V3 // 43040d70
VSUBV V1, V2, V3 // 43840d70
VSUBQ V1, V2, V3 // 43842d71
XVADDB X3, X2, X1 // 410c0a74
XVADDH X3, X2, X1 // 418c0a74
XVADDW X3, X2, X1 // 410c0b74
XVADDV X3, X2, X1 // 418c0b74
XVADDQ X3, X2, X1 // 410c2d75
XVSUBB X3, X2, X1 // 410c0c74
XVSUBH X3, X2, X1 // 418c0c74
XVSUBW X3, X2, X1 // 410c0d74
XVSUBV X3, X2, X1 // 418c0d74
XVSUBQ X3, X2, X1 // 418c2d75
// [X]VADD{B,H,W,V}U, [X]VSUB{B,H,W,V}U instructions
VADDBU $1, V2, V1 // 41048a72
VADDHU $2, V2, V1 // 41888a72
VADDWU $3, V2, V1 // 410c8b72
VADDVU $4, V2, V1 // 41908b72
VSUBBU $5, V2, V1 // 41148c72
VSUBHU $6, V2, V1 // 41988c72
VSUBWU $7, V2, V1 // 411c8d72
VSUBVU $8, V2, V1 // 41a08d72
XVADDBU $9, X1, X2 // 22248a76
XVADDHU $10, X1, X2 // 22a88a76
XVADDWU $11, X1, X2 // 222c8b76
XVADDVU $12, X1, X2 // 22b08b76
XVSUBBU $13, X1, X2 // 22348c76
XVSUBHU $14, X1, X2 // 22b88c76
XVSUBWU $15, X1, X2 // 223c8d76
XVSUBVU $16, X1, X2 // 22c08d76

View file

@ -745,6 +745,44 @@ const (
AVMOVQ AVMOVQ
AXVMOVQ AXVMOVQ
// LSX and LASX arithmetic instructions
AVADDB
AVADDH
AVADDW
AVADDV
AVADDQ
AXVADDB
AXVADDH
AXVADDW
AXVADDV
AXVADDQ
AVSUBB
AVSUBH
AVSUBW
AVSUBV
AVSUBQ
AXVSUBB
AXVSUBH
AXVSUBW
AXVSUBV
AXVSUBQ
AVADDBU
AVADDHU
AVADDWU
AVADDVU
AVSUBBU
AVSUBHU
AVSUBWU
AVSUBVU
AXVADDBU
AXVADDHU
AXVADDWU
AXVADDVU
AXVSUBBU
AXVSUBHU
AXVSUBWU
AXVSUBVU
// LSX and LASX Bit-manipulation Instructions // LSX and LASX Bit-manipulation Instructions
AVPCNTB AVPCNTB
AVPCNTH AVPCNTH

View file

@ -261,6 +261,42 @@ var Anames = []string{
"FTINTRNEVD", "FTINTRNEVD",
"VMOVQ", "VMOVQ",
"XVMOVQ", "XVMOVQ",
"VADDB",
"VADDH",
"VADDW",
"VADDV",
"VADDQ",
"XVADDB",
"XVADDH",
"XVADDW",
"XVADDV",
"XVADDQ",
"VSUBB",
"VSUBH",
"VSUBW",
"VSUBV",
"VSUBQ",
"XVSUBB",
"XVSUBH",
"XVSUBW",
"XVSUBV",
"XVSUBQ",
"VADDBU",
"VADDHU",
"VADDWU",
"VADDVU",
"VSUBBU",
"VSUBHU",
"VSUBWU",
"VSUBVU",
"XVADDBU",
"XVADDHU",
"XVADDWU",
"XVADDVU",
"XVSUBBU",
"XVSUBHU",
"XVSUBWU",
"XVSUBVU",
"VPCNTB", "VPCNTB",
"VPCNTH", "VPCNTH",
"VPCNTW", "VPCNTW",

View file

@ -89,6 +89,11 @@ var optab = []Optab{
{AVSEQB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, {AVSEQB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
{AXVSEQB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, {AXVSEQB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
{AVADDB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
{AVADDB, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
{AXVADDB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
{AXVADDB, C_XREG, C_NONE, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
{AVSLLB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, {AVSLLB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
{AVSLLB, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, {AVSLLB, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
{AXVSLLB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, {AXVSLLB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
@ -1550,6 +1555,28 @@ func buildop(ctxt *obj.Link) {
opset(AXVPCNTW, r0) opset(AXVPCNTW, r0)
opset(AXVPCNTV, r0) opset(AXVPCNTV, r0)
case AVADDB:
opset(AVADDH, r0)
opset(AVADDW, r0)
opset(AVADDV, r0)
opset(AVADDQ, r0)
opset(AVSUBB, r0)
opset(AVSUBH, r0)
opset(AVSUBW, r0)
opset(AVSUBV, r0)
opset(AVSUBQ, r0)
case AXVADDB:
opset(AXVADDH, r0)
opset(AXVADDW, r0)
opset(AXVADDV, r0)
opset(AXVADDQ, r0)
opset(AXVSUBB, r0)
opset(AXVSUBH, r0)
opset(AXVSUBW, r0)
opset(AXVSUBV, r0)
opset(AXVSUBQ, r0)
case AVSLLB: case AVSLLB:
opset(AVSRLB, r0) opset(AVSRLB, r0)
opset(AVSRAB, r0) opset(AVSRAB, r0)
@ -1574,11 +1601,27 @@ func buildop(ctxt *obj.Link) {
opset(AVSRLW, r0) opset(AVSRLW, r0)
opset(AVSRAW, r0) opset(AVSRAW, r0)
opset(AVROTRW, r0) opset(AVROTRW, r0)
opset(AVADDBU, r0)
opset(AVADDHU, r0)
opset(AVADDWU, r0)
opset(AVADDVU, r0)
opset(AVSUBBU, r0)
opset(AVSUBHU, r0)
opset(AVSUBWU, r0)
opset(AVSUBVU, r0)
case AXVSLLW: case AXVSLLW:
opset(AXVSRLW, r0) opset(AXVSRLW, r0)
opset(AXVSRAW, r0) opset(AXVSRAW, r0)
opset(AXVROTRW, r0) opset(AXVROTRW, r0)
opset(AXVADDBU, r0)
opset(AXVADDHU, r0)
opset(AXVADDWU, r0)
opset(AXVADDVU, r0)
opset(AXVSUBBU, r0)
opset(AXVSUBHU, r0)
opset(AXVSUBWU, r0)
opset(AXVSUBVU, r0)
case AVSLLV: case AVSLLV:
opset(AVSRLV, r0) opset(AVSRLV, r0)
@ -2779,6 +2822,46 @@ func (c *ctxt0) oprrr(a obj.As) uint32 {
return 0xe9de << 15 // xvrotr.w return 0xe9de << 15 // xvrotr.w
case AXVROTRV: case AXVROTRV:
return 0xe9df << 15 // xvrotr.d return 0xe9df << 15 // xvrotr.d
case AVADDB:
return 0xe014 << 15 // vadd.b
case AVADDH:
return 0xe015 << 15 // vadd.h
case AVADDW:
return 0xe016 << 15 // vadd.w
case AVADDV:
return 0xe017 << 15 // vadd.d
case AVADDQ:
return 0xe25a << 15 // vadd.q
case AVSUBB:
return 0xe018 << 15 // vsub.b
case AVSUBH:
return 0xe019 << 15 // vsub.h
case AVSUBW:
return 0xe01a << 15 // vsub.w
case AVSUBV:
return 0xe01b << 15 // vsub.d
case AVSUBQ:
return 0xe25b << 15 // vsub.q
case AXVADDB:
return 0xe814 << 15 // xvadd.b
case AXVADDH:
return 0xe815 << 15 // xvadd.h
case AXVADDW:
return 0xe816 << 15 // xvadd.w
case AXVADDV:
return 0xe817 << 15 // xvadd.d
case AXVADDQ:
return 0xea5a << 15 // xvadd.q
case AXVSUBB:
return 0xe818 << 15 // xvsub.b
case AXVSUBH:
return 0xe819 << 15 // xvsub.h
case AXVSUBW:
return 0xe81a << 15 // xvsub.w
case AXVSUBV:
return 0xe81b << 15 // xvsub.d
case AXVSUBQ:
return 0xea5b << 15 // xvsub.q
} }
if a < 0 { if a < 0 {
@ -3170,6 +3253,38 @@ func (c *ctxt0) opirr(a obj.As) uint32 {
return 0x1dcd<<18 | 0x1<<15 // xvsrai.w return 0x1dcd<<18 | 0x1<<15 // xvsrai.w
case AXVSRAV: case AXVSRAV:
return 0x1dcd<<18 | 0x1<<16 // xvsrai.d return 0x1dcd<<18 | 0x1<<16 // xvsrai.d
case AVADDBU:
return 0xe514 << 15 // vaddi.bu
case AVADDHU:
return 0xe515 << 15 // vaddi.hu
case AVADDWU:
return 0xe516 << 15 // vaddi.wu
case AVADDVU:
return 0xe517 << 15 // vaddi.du
case AVSUBBU:
return 0xe518 << 15 // vsubi.bu
case AVSUBHU:
return 0xe519 << 15 // vsubi.hu
case AVSUBWU:
return 0xe51a << 15 // vsubi.wu
case AVSUBVU:
return 0xe51b << 15 // vsubi.du
case AXVADDBU:
return 0xed14 << 15 // xvaddi.bu
case AXVADDHU:
return 0xed15 << 15 // xvaddi.hu
case AXVADDWU:
return 0xed16 << 15 // xvaddi.wu
case AXVADDVU:
return 0xed17 << 15 // xvaddi.du
case AXVSUBBU:
return 0xed18 << 15 // xvsubi.bu
case AXVSUBHU:
return 0xed19 << 15 // xvsubi.hu
case AXVSUBWU:
return 0xed1a << 15 // xvsubi.wu
case AXVSUBVU:
return 0xed1b << 15 // xvsubi.du
} }
if a < 0 { if a < 0 {