From dadbac0c9ed3bd8f18dcb7e250f3f8f2808a4927 Mon Sep 17 00:00:00 2001 From: Guoqi Chen Date: Wed, 29 Oct 2025 15:43:54 +0800 Subject: [PATCH] cmd/internal/obj/loong64: add VPERMI.W, XVPERMI.{W,V,Q} instruction support Go asm syntax: VPERMIW $0x1b, vj, vd XVPERMI{W,V,Q} $0x1b, xj, xd Equivalent platform assembler syntax: vpermi.w vd, vj, $0x1b xvpermi.{w,d,q} xd, xj, $0x1b Change-Id: Ie23b2fdd09b4c93801dc804913206f1c5a496268 Reviewed-on: https://go-review.googlesource.com/c/go/+/716800 Reviewed-by: Michael Pratt LUCI-TryBot-Result: Go LUCI Reviewed-by: Meidan Li Reviewed-by: sophie zhao Reviewed-by: Michael Knyszek --- src/cmd/asm/internal/asm/testdata/loong64enc1.s | 6 ++++++ src/cmd/internal/obj/loong64/a.out.go | 5 +++++ src/cmd/internal/obj/loong64/anames.go | 4 ++++ src/cmd/internal/obj/loong64/asm.go | 12 ++++++++++++ src/cmd/internal/obj/loong64/doc.go | 17 +++++++++++++++++ 5 files changed, 44 insertions(+) diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s index 6e2a86969d5..ca86ef8e6c1 100644 --- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s +++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s @@ -1017,6 +1017,12 @@ lable2: XVSHUF4IV $8, X1, X2 // 22209c77 XVSHUF4IV $15, X1, X2 // 223c9c77 + // VPERMIW, XVPERMI{W,V,Q} instructions + VPERMIW $0x1B, V1, V2 // VPERMIW $27, V1, V2 // 226ce473 + XVPERMIW $0x2B, X1, X2 // XVPERMIW $43, X1, X2 // 22ace477 + XVPERMIV $0x3B, X1, X2 // XVPERMIV $59, X1, X2 // 22ece877 + XVPERMIQ $0x4B, X1, X2 // XVPERMIQ $75, X1, X2 // 222ced77 + // [X]VSETEQZ.V, [X]VSETNEZ.V VSETEQV V1, FCC0 // 20989c72 VSETNEV V1, FCC0 // 209c9c72 diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go index 3a676db922c..762dc338e3e 100644 --- a/src/cmd/internal/obj/loong64/a.out.go +++ b/src/cmd/internal/obj/loong64/a.out.go @@ -1115,6 +1115,11 @@ const ( AXVSHUF4IW AXVSHUF4IV + AVPERMIW + AXVPERMIW + AXVPERMIV + AXVPERMIQ + AVSETEQV AVSETNEV AVSETANYEQB diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go index 422ccbd9b0b..607e6063110 100644 --- a/src/cmd/internal/obj/loong64/anames.go +++ b/src/cmd/internal/obj/loong64/anames.go @@ -586,6 +586,10 @@ var Anames = []string{ "XVSHUF4IH", "XVSHUF4IW", "XVSHUF4IV", + "VPERMIW", + "XVPERMIW", + "XVPERMIV", + "XVPERMIQ", "VSETEQV", "VSETNEV", "VSETANYEQB", diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go index 7eb5668d82e..8e2393bc1cd 100644 --- a/src/cmd/internal/obj/loong64/asm.go +++ b/src/cmd/internal/obj/loong64/asm.go @@ -1778,6 +1778,7 @@ func buildop(ctxt *obj.Link) { opset(AVSHUF4IH, r0) opset(AVSHUF4IW, r0) opset(AVSHUF4IV, r0) + opset(AVPERMIW, r0) case AXVANDB: opset(AXVORB, r0) @@ -1787,6 +1788,9 @@ func buildop(ctxt *obj.Link) { opset(AXVSHUF4IH, r0) opset(AXVSHUF4IW, r0) opset(AXVSHUF4IV, r0) + opset(AXVPERMIW, r0) + opset(AXVPERMIV, r0) + opset(AXVPERMIQ, r0) case AVANDV: opset(AVORV, r0) @@ -4362,6 +4366,14 @@ func (c *ctxt0) opirr(a obj.As) uint32 { return 0x1de6 << 18 // xvshuf4i.w case AXVSHUF4IV: return 0x1de7 << 18 // xvshuf4i.d + case AVPERMIW: + return 0x1cf9 << 18 // vpermi.w + case AXVPERMIW: + return 0x1df9 << 18 // xvpermi.w + case AXVPERMIV: + return 0x1dfa << 18 // xvpermi.d + case AXVPERMIQ: + return 0x1dfb << 18 // xvpermi.q case AVBITCLRB: return 0x1CC4<<18 | 0x1<<13 // vbitclri.b case AVBITCLRH: diff --git a/src/cmd/internal/obj/loong64/doc.go b/src/cmd/internal/obj/loong64/doc.go index f7e5a4fb427..45f75e6e703 100644 --- a/src/cmd/internal/obj/loong64/doc.go +++ b/src/cmd/internal/obj/loong64/doc.go @@ -229,6 +229,23 @@ Note: In the following sections 3.1 to 3.6, "ui4" (4-bit unsigned int immediate) VMOVQ 8(R4), V5.W4 | vldrepl.w v5, r4, $2 VMOVQ 8(R4), V5.V2 | vldrepl.d v5, r4, $1 +3.8 Vector permutation instruction + Instruction format: + VPERMIW ui8, Vj, Vd + + Mapping between Go and platform assembly: + Go assembly | platform assembly | semantics + VPERMIW ui8, Vj, Vd | vpermi.w vd, vj, ui8 | VR[vd].W[0] = VR[vj].W[ui8[1:0]], VR[vd].W[1] = VR[vj].W[ui8[3:2]], + | | VR[vd].W[2] = VR[vd].W[ui8[5:4]], VR[vd].W[3] = VR[vd].W[ui8[7:6]] + XVPERMIW ui8, Xj, Xd | xvpermi.w xd, xj, ui8 | XR[xd].W[0] = XR[xj].W[ui8[1:0]], XR[xd].W[1] = XR[xj].W[ui8[3:2]], + | | XR[xd].W[3] = XR[xd].W[ui8[7:6]], XR[xd].W[2] = XR[xd].W[ui8[5:4]], + | | XR[xd].W[4] = XR[xj].W[ui8[1:0]+4], XR[xd].W[5] = XR[xj].W[ui8[3:2]+4], + | | XR[xd].W[6] = XR[xd].W[ui8[5:4]+4], XR[xd].W[7] = XR[xd].W[ui8[7:6]+4] + XVPERMIV ui8, Xj, Xd | xvpermi.d xd, xj, ui8 | XR[xd].D[0] = XR[xj].D[ui8[1:0]], XR[xd].D[1] = XR[xj].D[ui8[3:2]], + | | XR[xd].D[2] = XR[xj].D[ui8[5:4]], XR[xd].D[3] = XR[xj].D[ui8[7:6]] + XVPERMIQ ui8, Xj, Xd | xvpermi.q xd, xj, ui8 | vec = {XR[xd], XR[xj]}, XR[xd].Q[0] = vec.Q[ui8[1:0]], XR[xd].Q[1] = vec.Q[ui8[5:4]] + + # Special instruction encoding definition and description on LoongArch 1. DBAR hint encoding for LA664(Loongson 3A6000) and later micro-architectures, paraphrased