cmd/internal/obj/loong64: add {,X}VSHUF.{B/H/W/V} instructions support

Go asm syntax:
         VSHUFB             V1, V2, V3, V4
        XVSHUFB             X1, X2, X3, X4
         VSHUF{H/W/V}       V1, V2, V3
        XVSHUF{H/W/V}       X1, X2, X3

Equivalent platform assembler syntax:
         vshuf.b            v4, v3, v2, v1
        xvshuf.b            x4, x3, x2, x1
         vshuf.{h/w/d}      v3, v2, v1
        xvshuf.{h/w/d}      x3, x2, x1

Change-Id: I8983467495f587cf46083fd81cb024400c7dc2a7
Reviewed-on: https://go-review.googlesource.com/c/go/+/716804
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
This commit is contained in:
Guoqi Chen 2025-10-31 17:45:39 +08:00 committed by abner chenc
parent 2e2072561c
commit 8111104a21
4 changed files with 53 additions and 0 deletions

View file

@ -1023,6 +1023,16 @@ lable2:
XVSHUF4IV $8, X1, X2 // 22209c77
XVSHUF4IV $15, X1, X2 // 223c9c77
// [X]VSHUF.{B/H/W/V} instructions
VSHUFH V1, V2, V3 // 43847a71
VSHUFW V1, V2, V3 // 43047b71
VSHUFV V1, V2, V3 // 43847b71
XVSHUFH X1, X2, X3 // 43847a75
XVSHUFW X1, X2, X3 // 43047b75
XVSHUFV X1, X2, X3 // 43847b75
VSHUFB V1, V2, V3, V4 // 6488500d
XVSHUFB X1, X2, X3, X4 // 6488600d
// VPERMIW, XVPERMI{W,V,Q} instructions
VPERMIW $0x1B, V1, V2 // VPERMIW $27, V1, V2 // 226ce473
XVPERMIW $0x2B, X1, X2 // XVPERMIW $43, X1, X2 // 22ace477

View file

@ -1115,6 +1115,15 @@ const (
AXVSHUF4IW
AXVSHUF4IV
AVSHUFB
AVSHUFH
AVSHUFW
AVSHUFV
AXVSHUFB
AXVSHUFH
AXVSHUFW
AXVSHUFV
AVPERMIW
AXVPERMIW
AXVPERMIV

View file

@ -586,6 +586,14 @@ var Anames = []string{
"XVSHUF4IH",
"XVSHUF4IW",
"XVSHUF4IV",
"VSHUFB",
"VSHUFH",
"VSHUFW",
"VSHUFV",
"XVSHUFB",
"XVSHUFH",
"XVSHUFW",
"XVSHUFV",
"VPERMIW",
"XVPERMIW",
"XVPERMIV",

View file

@ -155,6 +155,8 @@ var optab = []Optab{
{AFMADDF, C_FREG, C_FREG, C_NONE, C_FREG, C_NONE, 37, 4, 0, 0},
{AFMADDF, C_FREG, C_FREG, C_FREG, C_FREG, C_NONE, 37, 4, 0, 0},
{AVSHUFB, C_VREG, C_VREG, C_VREG, C_VREG, C_NONE, 37, 4, 0, 0},
{AXVSHUFB, C_XREG, C_XREG, C_XREG, C_XREG, C_NONE, 37, 4, 0, 0},
{AFSEL, C_FCCREG, C_FREG, C_FREG, C_FREG, C_NONE, 33, 4, 0, 0},
{AFSEL, C_FCCREG, C_FREG, C_NONE, C_FREG, C_NONE, 33, 4, 0, 0},
@ -1561,6 +1563,8 @@ func buildop(ctxt *obj.Link) {
AMOVWU,
AVMOVQ,
AXVMOVQ,
AVSHUFB,
AXVSHUFB,
ANEGW,
ANEGV,
AWORD,
@ -1698,6 +1702,9 @@ func buildop(ctxt *obj.Link) {
opset(AVMULD, r0)
opset(AVDIVF, r0)
opset(AVDIVD, r0)
opset(AVSHUFH, r0)
opset(AVSHUFW, r0)
opset(AVSHUFV, r0)
case AXVSEQB:
opset(AXVSEQH, r0)
@ -1771,6 +1778,9 @@ func buildop(ctxt *obj.Link) {
opset(AXVMULD, r0)
opset(AXVDIVF, r0)
opset(AXVDIVD, r0)
opset(AXVSHUFH, r0)
opset(AXVSHUFW, r0)
opset(AXVSHUFV, r0)
case AVANDB:
opset(AVORB, r0)
@ -3107,6 +3117,10 @@ func (c *ctxt0) oprrrr(a obj.As) uint32 {
return 0x8d << 20 // fnmsub.s
case AFNMSUBD:
return 0x8e << 20 // fnmsub.d
case AVSHUFB:
return 0x0D5 << 20 // vshuf.b
case AXVSHUFB:
return 0x0D6 << 20 // xvshuf.b
}
c.ctxt.Diag("bad rrrr opcode %v", a)
@ -3775,6 +3789,18 @@ func (c *ctxt0) oprrr(a obj.As) uint32 {
return 0xea22 << 15 // xvbitrev.w
case AXVBITREVV:
return 0xea23 << 15 // xvbitrev.d
case AVSHUFH:
return 0x0E2F5 << 15 // vshuf.h
case AVSHUFW:
return 0x0E2F6 << 15 // vshuf.w
case AVSHUFV:
return 0x0E2F7 << 15 // vshuf.d
case AXVSHUFH:
return 0x0EAF5 << 15 // xvshuf.h
case AXVSHUFW:
return 0x0EAF6 << 15 // xvshuf.w
case AXVSHUFV:
return 0x0EAF7 << 15 // xvshuf.d
}
if a < 0 {