cmd/internal/obj/loong64: add {,X}VSLT.{B/H/W/V}{,U} instructions support

Go asm syntax:
	 VSLT{B,H,W,V}    V1,   V2, V3
	 VSLT{B,H,W,V}U   V1,   V2, V3
	 VSLT{B,H,W,V}    $-2,  V2, V3
	 VSLT{B,H,W,V}U   $8,   V2, V3
	XVSLT{B,H,W,V}    X1,   X2, X3
	XVSLT{B,H,W,V}U   X1,   X2, X3
	XVSLT{B,H,W,V}    $-16, X2, X3
	XVSLT{B,H,W,V}U   $31,  X2, X3

  Equivalent platform assembler syntax:
	 vslt.{b,h,w,d}    v3, v2, v1
	 vslt.{b,h,w,d}u   v3, v2, v1
	 vslti.{b,h,w,d}   v3, v2, $-2
	 vslti.{b,h,w,d}u  v3, v2, $8
	xvslt.{b,h,w,d}    x3, x2, x1
	xvslt.{b,h,w,d}u   x3, x2, x1
	xvslti.{b,h,w,d}   x3, x2, $-16
	xvslti.{b,h,w,d}u  x3, x2, $31

Change-Id: Iccfb65c0c19b62d2c5ec279a077393c68e1bf7d4
Reviewed-on: https://go-review.googlesource.com/c/go/+/721620
Reviewed-by: Mark Freeman <markfreeman@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Guoqi Chen 2025-11-18 21:10:51 +08:00 committed by abner chenc
parent bd2b117c2c
commit cdba82c7d6
4 changed files with 159 additions and 0 deletions

View file

@ -597,6 +597,42 @@ lable2:
XVSEQV $15, X2, X4 // 44bc8176 XVSEQV $15, X2, X4 // 44bc8176
XVSEQV $-15, X2, X4 // 44c48176 XVSEQV $-15, X2, X4 // 44c48176
// VSLTB{B,H,W,V}, XVSLTB{B,H,W,V} instruction
VSLTB V1, V2, V3 // 43040670
VSLTH V1, V2, V3 // 43840670
VSLTW V1, V2, V3 // 43040770
VSLTV V1, V2, V3 // 43840770
XVSLTB X1, X2, X3 // 43040674
XVSLTH X1, X2, X3 // 43840674
XVSLTW X1, X2, X3 // 43040774
XVSLTV X1, X2, X3 // 43840774
VSLTB $1, V2, V3 // 43048672
VSLTH $16, V2, V3 // 43c08672
VSLTW $-16, V2, V3 // 43408772
VSLTV $-15, V2, V3 // 43c48772
XVSLTB $1, X2, X3 // 43048676
XVSLTH $16, X2, X3 // 43c08676
XVSLTW $-16, X2, X3 // 43408776
XVSLTV $-16, X2, X3 // 43c08776
// VSLTB{B,H,W,V}U, XVSLTB{B,H,W,V}U instruction
VSLTBU V1, V2, V3 // 43040870
VSLTHU V1, V2, V3 // 43840870
VSLTWU V1, V2, V3 // 43040970
VSLTVU V1, V2, V3 // 43840970
XVSLTBU X1, X2, X3 // 43040874
XVSLTHU X1, X2, X3 // 43840874
XVSLTWU X1, X2, X3 // 43040974
XVSLTVU X1, X2, X3 // 43840974
VSLTBU $0, V2, V3 // 43008872
VSLTHU $31, V2, V3 // 43fc8872
VSLTWU $16, V2, V3 // 43408972
VSLTVU $1, V2, V3 // 43848972
XVSLTBU $0, X2, X3 // 43008876
XVSLTHU $31, X2, X3 // 43fc8876
XVSLTWU $8, X2, X3 // 43208976
XVSLTVU $0, X2, X3 // 43808976
// VPCNT{B,H,W,V}, XVPCNT{B,H,W,V} instruction // VPCNT{B,H,W,V}, XVPCNT{B,H,W,V} instruction
VPCNTB V1, V2 // 22209c72 VPCNTB V1, V2 // 22209c72
VPCNTH V1, V2 // 22249c72 VPCNTH V1, V2 // 22249c72

View file

@ -912,6 +912,23 @@ const (
AVSEQV AVSEQV
AXVSEQV AXVSEQV
AVSLTB
AVSLTH
AVSLTW
AVSLTV
AVSLTBU
AVSLTHU
AVSLTWU
AVSLTVU
AXVSLTB
AXVSLTH
AXVSLTW
AXVSLTV
AXVSLTBU
AXVSLTHU
AXVSLTWU
AXVSLTVU
// LSX and LASX integer div and mod instructions // LSX and LASX integer div and mod instructions
AVDIVB AVDIVB
AVDIVH AVDIVH

View file

@ -400,6 +400,22 @@ var Anames = []string{
"XVSEQW", "XVSEQW",
"VSEQV", "VSEQV",
"XVSEQV", "XVSEQV",
"VSLTB",
"VSLTH",
"VSLTW",
"VSLTV",
"VSLTBU",
"VSLTHU",
"VSLTWU",
"VSLTVU",
"XVSLTB",
"XVSLTH",
"XVSLTW",
"XVSLTV",
"XVSLTBU",
"XVSLTHU",
"XVSLTWU",
"XVSLTVU",
"VDIVB", "VDIVB",
"VDIVH", "VDIVH",
"VDIVW", "VDIVW",

View file

@ -93,6 +93,14 @@ var optab = []Optab{
{AXVSEQB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, {AXVSEQB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
{AVSEQB, C_S5CON, C_VREG, C_NONE, C_VREG, C_NONE, 22, 4, 0, 0}, {AVSEQB, C_S5CON, C_VREG, C_NONE, C_VREG, C_NONE, 22, 4, 0, 0},
{AXVSEQB, C_S5CON, C_XREG, C_NONE, C_XREG, C_NONE, 22, 4, 0, 0}, {AXVSEQB, C_S5CON, C_XREG, C_NONE, C_XREG, C_NONE, 22, 4, 0, 0},
{AVSLTB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
{AXVSLTB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
{AVSLTB, C_S5CON, C_VREG, C_NONE, C_VREG, C_NONE, 22, 4, 0, 0},
{AXVSLTB, C_S5CON, C_XREG, C_NONE, C_XREG, C_NONE, 22, 4, 0, 0},
{AVSLTB, C_U5CON, C_VREG, C_NONE, C_VREG, C_NONE, 31, 4, 0, 0},
{AXVSLTB, C_U5CON, C_XREG, C_NONE, C_XREG, C_NONE, 31, 4, 0, 0},
{AVANDV, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, {AVANDV, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
{AVANDV, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, {AVANDV, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
{AXVANDV, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, {AXVANDV, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
@ -1784,6 +1792,24 @@ func buildop(ctxt *obj.Link) {
opset(AXVSHUFW, r0) opset(AXVSHUFW, r0)
opset(AXVSHUFV, r0) opset(AXVSHUFV, r0)
case AVSLTB:
opset(AVSLTH, r0)
opset(AVSLTW, r0)
opset(AVSLTV, r0)
opset(AVSLTBU, r0)
opset(AVSLTHU, r0)
opset(AVSLTWU, r0)
opset(AVSLTVU, r0)
case AXVSLTB:
opset(AXVSLTH, r0)
opset(AXVSLTW, r0)
opset(AXVSLTV, r0)
opset(AXVSLTBU, r0)
opset(AXVSLTHU, r0)
opset(AXVSLTWU, r0)
opset(AXVSLTVU, r0)
case AVANDB: case AVANDB:
opset(AVORB, r0) opset(AVORB, r0)
opset(AVXORB, r0) opset(AVXORB, r0)
@ -3379,6 +3405,38 @@ func (c *ctxt0) oprrr(a obj.As) uint32 {
return 0x0e003 << 15 // vseq.d return 0x0e003 << 15 // vseq.d
case AXVSEQV: case AXVSEQV:
return 0x0e803 << 15 // xvseq.d return 0x0e803 << 15 // xvseq.d
case AVSLTB:
return 0x0E00C << 15 // vslt.b
case AVSLTH:
return 0x0E00D << 15 // vslt.h
case AVSLTW:
return 0x0E00E << 15 // vslt.w
case AVSLTV:
return 0x0E00F << 15 // vslt.d
case AVSLTBU:
return 0x0E010 << 15 // vslt.bu
case AVSLTHU:
return 0x0E011 << 15 // vslt.hu
case AVSLTWU:
return 0x0E012 << 15 // vslt.wu
case AVSLTVU:
return 0x0E013 << 15 // vslt.du
case AXVSLTB:
return 0x0E80C << 15 // xvslt.b
case AXVSLTH:
return 0x0E80D << 15 // xvslt.h
case AXVSLTW:
return 0x0E80E << 15 // xvslt.w
case AXVSLTV:
return 0x0E80F << 15 // xvslt.d
case AXVSLTBU:
return 0x0E810 << 15 // xvslt.bu
case AXVSLTHU:
return 0x0E811 << 15 // xvslt.hu
case AXVSLTWU:
return 0x0E812 << 15 // xvslt.wu
case AXVSLTVU:
return 0x0E813 << 15 // xvslt.du
case AVANDV: case AVANDV:
return 0x0E24C << 15 // vand.v return 0x0E24C << 15 // vand.v
case AVORV: case AVORV:
@ -4399,6 +4457,38 @@ func (c *ctxt0) opirr(a obj.As) uint32 {
return 0x0ED02 << 15 // xvseqi.w return 0x0ED02 << 15 // xvseqi.w
case AXVSEQV: case AXVSEQV:
return 0x0ED03 << 15 // xvseqi.d return 0x0ED03 << 15 // xvseqi.d
case AVSLTB:
return 0x0E50C << 15 // vslti.b
case AVSLTH:
return 0x0E50D << 15 // vslti.h
case AVSLTW:
return 0x0E50E << 15 // vslti.w
case AVSLTV:
return 0x0E50F << 15 // vslti.d
case AVSLTBU:
return 0x0E510 << 15 // vslti.bu
case AVSLTHU:
return 0x0E511 << 15 // vslti.hu
case AVSLTWU:
return 0x0E512 << 15 // vslti.wu
case AVSLTVU:
return 0x0E513 << 15 // vslti.du
case AXVSLTB:
return 0x0ED0C << 15 // xvslti.b
case AXVSLTH:
return 0x0ED0D << 15 // xvslti.h
case AXVSLTW:
return 0x0ED0E << 15 // xvslti.w
case AXVSLTV:
return 0x0ED0F << 15 // xvslti.d
case AXVSLTBU:
return 0x0ED10 << 15 // xvslti.bu
case AXVSLTHU:
return 0x0ED11 << 15 // xvslti.hu
case AXVSLTWU:
return 0x0ED12 << 15 // xvslti.wu
case AXVSLTVU:
return 0x0ED13 << 15 // xvslti.du
case AVROTRB: case AVROTRB:
return 0x1ca8<<18 | 0x1<<13 // vrotri.b return 0x1ca8<<18 | 0x1<<13 // vrotri.b
case AVROTRH: case AVROTRH: