cmd/asm,cmd/compile,cmd/internal/obj/riscv: use compressed instructions on riscv64

Make use of compressed instructions on riscv64 - add a compress pass to the end of the assembler, which replaces non-compressed instructions with compressed alternatives if possible. Provide a `compressinstructions` compiler and assembler debug flag, such that the compression pass can be disabled via `-asmflags=all=-d=compressinstructions=0` and `-gcflags=all=-d=compressinstructions=0`. Note that this does not prevent the explicit use of compressed instructions via assembly. Note that this does not make use of compressed control transfer instructions - this will be implemented in later changes. Reduces the text size of a hello world binary by ~121KB and reduces the text size of the go binary on riscv64 by ~1.21MB (between 8-10% in both cases). Updates #71105 Cq-Include-Trybots: luci.golang.try:gotip-linux-riscv64 Change-Id: I24258353688554042c2a836deed4830cc673e985 Reviewed-on: https://go-review.googlesource.com/c/go/+/523478 Reviewed-by: Mark Ryan <markdryan@rivosinc.com> Reviewed-by: Mark Freeman <markfreeman@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
2025-12-08 06:10:04 +00:00 · 2025-09-26 05:05:49 +10:00 · 2025-09-26 05:05:49 +10:00 · 9859b43643
commit 9859b43643
parent b9ef0633f6
9 changed files with 225 additions and 48 deletions
--- a/src/cmd/asm/internal/flags/flags.go
+++ b/src/cmd/asm/internal/flags/flags.go
@ -29,8 +29,9 @@ var (
 )
 var DebugFlags struct {
-	MayMoreStack string `help:"call named function before all stack growth checks"`
+	CompressInstructions int    `help:"use compressed instructions when possible (if supported by architecture)"`
-	PCTab        string `help:"print named pc-value table\nOne of: pctospadj, pctofile, pctoline, pctoinline, pctopcdata"`
+	MayMoreStack         string `help:"call named function before all stack growth checks"`
 	PCTab                string `help:"print named pc-value table\nOne of: pctospadj, pctofile, pctoline, pctoinline, pctopcdata"`
 }
 var (
@ -47,6 +48,8 @@ func init() {
 	flag.Var(objabi.NewDebugFlag(&DebugFlags, nil), "d", "enable debugging settings; try -d help")
 	objabi.AddVersionFlag() // -V
 	objabi.Flagcount("S", "print assembly and machine code", &PrintOut)
 	DebugFlags.CompressInstructions = 1
 }
 // MultiFlag allows setting a value multiple times to collect a list, as in -I=dir1 -I=dir2.
--- a/src/cmd/asm/main.go
+++ b/src/cmd/asm/main.go
@ -40,6 +40,7 @@ func main() {
 		log.Fatalf("unrecognized architecture %s", GOARCH)
 	}
 	ctxt := obj.Linknew(architecture.LinkArch)
 	ctxt.CompressInstructions = flags.DebugFlags.CompressInstructions != 0
 	ctxt.Debugasm = flags.PrintOut
 	ctxt.Debugvlog = flags.DebugV
 	ctxt.Flag_dynlink = *flags.Dynlink
--- a/src/cmd/compile/internal/base/debug.go
+++ b/src/cmd/compile/internal/base/debug.go
@ -20,6 +20,7 @@ type DebugFlags struct {
 	Append                int    `help:"print information about append compilation"`
 	Checkptr              int    `help:"instrument unsafe pointer conversions\n0: instrumentation disabled\n1: conversions involving unsafe.Pointer are instrumented\n2: conversions to unsafe.Pointer force heap allocation" concurrent:"ok"`
 	Closure               int    `help:"print information about closure compilation"`
 	CompressInstructions  int    `help:"use compressed instructions when possible (if supported by architecture)"`
 	Converthash           string `help:"hash value for use in debugging changes to platform-dependent float-to-[u]int conversion" concurrent:"ok"`
 	Defer                 int    `help:"print information about defer compilation"`
 	DisableNil            int    `help:"disable nil checks" concurrent:"ok"`
--- a/src/cmd/compile/internal/base/flag.go
+++ b/src/cmd/compile/internal/base/flag.go
@ -177,6 +177,7 @@ func ParseFlags() {
 	Flag.WB = true
 	Debug.ConcurrentOk = true
 	Debug.CompressInstructions = 1
 	Debug.MaxShapeLen = 500
 	Debug.AlignHot = 1
 	Debug.InlFuncsWithClosures = 1
@ -299,6 +300,7 @@ func ParseFlags() {
 	}
 	parseSpectre(Flag.Spectre) // left as string for RecordFlags
 	Ctxt.CompressInstructions = Debug.CompressInstructions != 0
 	Ctxt.Flag_shared = Ctxt.Flag_dynlink || Ctxt.Flag_shared
 	Ctxt.Flag_optimize = Flag.N == 0
 	Ctxt.Debugasm = int(Flag.S)
--- a/src/cmd/internal/obj/link.go
+++ b/src/cmd/internal/obj/link.go
@ -1153,36 +1153,37 @@ type Func interface {
 // Link holds the context for writing object code from a compiler
 // to be linker input or for reading that input into the linker.
 type Link struct {
-	Headtype           objabi.HeadType
+	Headtype             objabi.HeadType
-	Arch               *LinkArch
+	Arch                 *LinkArch
-	Debugasm           int
+	CompressInstructions bool // use compressed instructions where possible (if supported by architecture)
-	Debugvlog          bool
+	Debugasm             int
-	Debugpcln          string
+	Debugvlog            bool
-	Flag_shared        bool
+	Debugpcln            string
-	Flag_dynlink       bool
+	Flag_shared          bool
-	Flag_linkshared    bool
+	Flag_dynlink         bool
-	Flag_optimize      bool
+	Flag_linkshared      bool
-	Flag_locationlists bool
+	Flag_optimize        bool
-	Flag_noRefName     bool   // do not include referenced symbol names in object file
+	Flag_locationlists   bool
-	Retpoline          bool   // emit use of retpoline stubs for indirect jmp/call
+	Flag_noRefName       bool   // do not include referenced symbol names in object file
-	Flag_maymorestack  string // If not "", call this function before stack checks
+	Retpoline            bool   // emit use of retpoline stubs for indirect jmp/call
-	Bso                *bufio.Writer
+	Flag_maymorestack    string // If not "", call this function before stack checks
-	Pathname           string
+	Bso                  *bufio.Writer
-	Pkgpath            string           // the current package's import path
+	Pathname             string
-	hashmu             sync.Mutex       // protects hash, funchash
+	Pkgpath              string           // the current package's import path
-	hash               map[string]*LSym // name -> sym mapping
+	hashmu               sync.Mutex       // protects hash, funchash
-	funchash           map[string]*LSym // name -> sym mapping for ABIInternal syms
+	hash                 map[string]*LSym // name -> sym mapping
-	statichash         map[string]*LSym // name -> sym mapping for static syms
+	funchash             map[string]*LSym // name -> sym mapping for ABIInternal syms
-	PosTable           src.PosTable
+	statichash           map[string]*LSym // name -> sym mapping for static syms
-	InlTree            InlTree // global inlining tree used by gc/inl.go
+	PosTable             src.PosTable
-	DwFixups           *DwarfFixupTable
+	InlTree              InlTree // global inlining tree used by gc/inl.go
-	DwTextCount        int
+	DwFixups             *DwarfFixupTable
-	Imports            []goobj.ImportedPkg
+	DwTextCount          int
-	DiagFunc           func(string, ...any)
+	Imports              []goobj.ImportedPkg
-	DiagFlush          func()
+	DiagFunc             func(string, ...any)
-	DebugInfo          func(ctxt *Link, fn *LSym, info *LSym, curfn Func) ([]dwarf.Scope, dwarf.InlCalls)
+	DiagFlush            func()
-	GenAbstractFunc    func(fn *LSym)
+	DebugInfo            func(ctxt *Link, fn *LSym, info *LSym, curfn Func) ([]dwarf.Scope, dwarf.InlCalls)
-	Errors             int
+	GenAbstractFunc      func(fn *LSym)
 	Errors               int
 	InParallel    bool // parallel backend phase in effect
 	UseBASEntries bool // use Base Address Selection Entries in location lists and PC ranges
--- a/src/cmd/internal/obj/riscv/asm_test.go
+++ b/src/cmd/internal/obj/riscv/asm_test.go
@ -11,8 +11,8 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
 	"regexp"
 	"runtime"
 	"strings"
 	"testing"
 )
@ -48,10 +48,10 @@ func genLargeBranch(buf *bytes.Buffer) {
 	fmt.Fprintln(buf, "TEXT f(SB),0,$0-0")
 	fmt.Fprintln(buf, "BEQ X0, X0, label")
 	for i := 0; i < 1<<19; i++ {
-		fmt.Fprintln(buf, "ADD $0, X0, X0")
+		fmt.Fprintln(buf, "ADD $0, X5, X0")
 	}
 	fmt.Fprintln(buf, "label:")
-	fmt.Fprintln(buf, "ADD $0, X0, X0")
+	fmt.Fprintln(buf, "ADD $0, X5, X0")
 }
 // TestLargeCall generates a large function (>1MB of text) with a call to
@ -112,11 +112,11 @@ func genLargeCall(buf *bytes.Buffer) {
 	fmt.Fprintln(buf, "TEXT ·x(SB),0,$0-0")
 	fmt.Fprintln(buf, "CALL ·y(SB)")
 	for i := 0; i < 1<<19; i++ {
-		fmt.Fprintln(buf, "ADD $0, X0, X0")
+		fmt.Fprintln(buf, "ADD $0, X5, X0")
 	}
 	fmt.Fprintln(buf, "RET")
 	fmt.Fprintln(buf, "TEXT ·y(SB),0,$0-0")
-	fmt.Fprintln(buf, "ADD $0, X0, X0")
+	fmt.Fprintln(buf, "ADD $0, X5, X0")
 	fmt.Fprintln(buf, "RET")
 }
@ -301,9 +301,9 @@ TEXT _stub(SB),$0-0
 	//	FENCE
 	//	NOP
 	//	FENCE
-	//	RET
+	//	RET	(CJALR or JALR)
-	want := "0f 00 f0 0f 13 00 00 00 0f 00 f0 0f 67 80 00 00"
+	want := regexp.MustCompile("0x0000 0f 00 f0 0f 13 00 00 00 0f 00 f0 0f (82 80|67 80 00 00) ")
-	if !strings.Contains(string(out), want) {
+	if !want.Match(out) {
 		t.Errorf("PCALIGN test failed - got %s\nwant %s", out, want)
 	}
 }
--- a/src/cmd/internal/obj/riscv/cpu.go
+++ b/src/cmd/internal/obj/riscv/cpu.go
@ -326,6 +326,9 @@ const (
 	NEED_GOT_PCREL_ITYPE_RELOC
 )
 const NEED_RELOC = NEED_JAL_RELOC | NEED_CALL_RELOC | NEED_PCREL_ITYPE_RELOC |
 	NEED_PCREL_STYPE_RELOC | NEED_GOT_PCREL_ITYPE_RELOC
 // RISC-V mnemonics, as defined in the "opcodes" and "opcodes-pseudo" files
 // at https://github.com/riscv/riscv-opcodes.
 //
--- a/src/cmd/internal/obj/riscv/obj.go
+++ b/src/cmd/internal/obj/riscv/obj.go
@ -414,10 +414,10 @@ func containsCall(sym *obj.LSym) bool {
 // setPCs sets the Pc field in all instructions reachable from p.
 // It uses pc as the initial value and returns the next available pc.
-func setPCs(p *obj.Prog, pc int64) int64 {
+func setPCs(p *obj.Prog, pc int64, compress bool) int64 {
 	for ; p != nil; p = p.Link {
 		p.Pc = pc
-		for _, ins := range instructionsForProg(p) {
+		for _, ins := range instructionsForProg(p, compress) {
 			pc += int64(ins.length())
 		}
@ -671,7 +671,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
 	// a fixed point will be reached).  No attempt to handle functions > 2GiB.
 	for {
 		big, rescan := false, false
-		maxPC := setPCs(cursym.Func().Text, 0)
+		maxPC := setPCs(cursym.Func().Text, 0, ctxt.CompressInstructions)
 		if maxPC+maxTrampSize > (1 << 20) {
 			big = true
 		}
@ -801,7 +801,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
 	// Validate all instructions - this provides nice error messages.
 	for p := cursym.Func().Text; p != nil; p = p.Link {
-		for _, ins := range instructionsForProg(p) {
+		for _, ins := range instructionsForProg(p, ctxt.CompressInstructions) {
 			ins.validate(ctxt)
 		}
 	}
@ -1141,6 +1141,14 @@ func wantImmU(ctxt *obj.Link, ins *instruction, imm int64, nbits uint) {
 	}
 }
 func isScaledImmI(imm int64, nbits uint, scale int64) bool {
 	return immFits(imm, nbits, true) == nil && imm%scale == 0
 }
 func isScaledImmU(imm int64, nbits uint, scale int64) bool {
 	return immFits(imm, nbits, false) == nil && imm%scale == 0
 }
 func wantScaledImm(ctxt *obj.Link, ins *instruction, imm int64, nbits uint, scale int64, signed bool) {
 	if err := immFits(imm, nbits, signed); err != nil {
 		ctxt.Diag("%v: %v", ins, err)
@ -1180,6 +1188,10 @@ func wantIntReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
 	wantReg(ctxt, ins, pos, "integer", r, REG_X0, REG_X31)
 }
 func isIntPrimeReg(r uint32) bool {
 	return r >= REG_X8 && r <= REG_X15
 }
 // wantIntPrimeReg checks that r is an integer register that can be used
 // in a prime register field of a compressed instruction.
 func wantIntPrimeReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
@ -1191,6 +1203,10 @@ func wantFloatReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
 	wantReg(ctxt, ins, pos, "float", r, REG_F0, REG_F31)
 }
 func isFloatPrimeReg(r uint32) bool {
 	return r >= REG_F8 && r <= REG_F15
 }
 // wantFloatPrimeReg checks that r is an floating-point register that can
 // be used in a prime register field of a compressed instruction.
 func wantFloatPrimeReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
@ -3515,6 +3531,147 @@ func (ins *instruction) usesRegTmp() bool {
 	return ins.rd == REG_TMP || ins.rs1 == REG_TMP || ins.rs2 == REG_TMP
 }
 func (ins *instruction) compress() {
 	switch ins.as {
 	case ALW:
 		if ins.rd != REG_X0 && ins.rs1 == REG_SP && isScaledImmU(ins.imm, 8, 4) {
 			ins.as, ins.rs1, ins.rs2 = ACLWSP, obj.REG_NONE, ins.rs1
 		} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 7, 4) {
 			ins.as = ACLW
 		}
 	case ALD:
 		if ins.rs1 == REG_SP && ins.rd != REG_X0 && isScaledImmU(ins.imm, 9, 8) {
 			ins.as, ins.rs1, ins.rs2 = ACLDSP, obj.REG_NONE, ins.rs1
 		} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) {
 			ins.as = ACLD
 		}
 	case AFLD:
 		if ins.rs1 == REG_SP && isScaledImmU(ins.imm, 9, 8) {
 			ins.as, ins.rs1, ins.rs2 = ACFLDSP, obj.REG_NONE, ins.rs1
 		} else if isFloatPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) {
 			ins.as = ACFLD
 		}
 	case ASW:
 		if ins.rd == REG_SP && isScaledImmU(ins.imm, 8, 4) {
 			ins.as, ins.rs1, ins.rs2 = ACSWSP, obj.REG_NONE, ins.rs1
 		} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 7, 4) {
 			ins.as, ins.rd, ins.rs1, ins.rs2 = ACSW, obj.REG_NONE, ins.rd, ins.rs1
 		}
 	case ASD:
 		if ins.rd == REG_SP && isScaledImmU(ins.imm, 9, 8) {
 			ins.as, ins.rs1, ins.rs2 = ACSDSP, obj.REG_NONE, ins.rs1
 		} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) {
 			ins.as, ins.rd, ins.rs1, ins.rs2 = ACSD, obj.REG_NONE, ins.rd, ins.rs1
 		}
 	case AFSD:
 		if ins.rd == REG_SP && isScaledImmU(ins.imm, 9, 8) {
 			ins.as, ins.rs1, ins.rs2 = ACFSDSP, obj.REG_NONE, ins.rs1
 		} else if isIntPrimeReg(ins.rd) && isFloatPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) {
 			ins.as, ins.rd, ins.rs1, ins.rs2 = ACFSD, obj.REG_NONE, ins.rd, ins.rs1
 		}
 	case AADDI:
 		if ins.rd == REG_SP && ins.rs1 == REG_SP && ins.imm != 0 && isScaledImmI(ins.imm, 10, 16) {
 			ins.as = ACADDI16SP
 		} else if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.imm != 0 && immIFits(ins.imm, 6) == nil {
 			ins.as = ACADDI
 		} else if isIntPrimeReg(ins.rd) && ins.rs1 == REG_SP && ins.imm != 0 && isScaledImmU(ins.imm, 10, 4) {
 			ins.as = ACADDI4SPN
 		} else if ins.rd != REG_X0 && ins.rs1 == REG_X0 && immIFits(ins.imm, 6) == nil {
 			ins.as, ins.rs1 = ACLI, obj.REG_NONE
 		} else if ins.rd != REG_X0 && ins.rs1 != REG_X0 && ins.imm == 0 {
 			ins.as, ins.rs1, ins.rs2 = ACMV, obj.REG_NONE, ins.rs1
 		} else if ins.rd == REG_X0 && ins.rs1 == REG_X0 && ins.imm == 0 {
 			ins.as, ins.rs1 = ACNOP, ins.rd
 		}
 	case AADDIW:
 		if ins.rd == ins.rs1 && immIFits(ins.imm, 6) == nil {
 			ins.as = ACADDIW
 		}
 	case ALUI:
 		if ins.rd != REG_X0 && ins.rd != REG_SP && ins.imm != 0 && immIFits(ins.imm, 6) == nil {
 			ins.as = ACLUI
 		}
 	case ASLLI:
 		if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.imm != 0 {
 			ins.as = ACSLLI
 		}
 	case ASRLI:
 		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && ins.imm != 0 {
 			ins.as = ACSRLI
 		}
 	case ASRAI:
 		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && ins.imm != 0 {
 			ins.as = ACSRAI
 		}
 	case AANDI:
 		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && immIFits(ins.imm, 6) == nil {
 			ins.as = ACANDI
 		}
 	case AADD:
 		if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.rs2 != REG_X0 {
 			ins.as = ACADD
 		} else if ins.rd != REG_X0 && ins.rd == ins.rs2 && ins.rs1 != REG_X0 {
 			ins.as, ins.rs1, ins.rs2 = ACADD, ins.rs2, ins.rs1
 		} else if ins.rd != REG_X0 && ins.rs1 == REG_X0 && ins.rs2 != REG_X0 {
 			ins.as = ACMV
 		}
 	case AADDW:
 		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
 			ins.as = ACADDW
 		} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 {
 			ins.as, ins.rs1, ins.rs2 = ACADDW, ins.rs2, ins.rs1
 		}
 	case ASUB:
 		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
 			ins.as = ACSUB
 		}
 	case ASUBW:
 		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
 			ins.as = ACSUBW
 		}
 	case AAND:
 		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
 			ins.as = ACAND
 		} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 {
 			ins.as, ins.rs1, ins.rs2 = ACAND, ins.rs2, ins.rs1
 		}
 	case AOR:
 		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
 			ins.as = ACOR
 		} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 {
 			ins.as, ins.rs1, ins.rs2 = ACOR, ins.rs2, ins.rs1
 		}
 	case AXOR:
 		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
 			ins.as = ACXOR
 		} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 {
 			ins.as, ins.rs1, ins.rs2 = ACXOR, ins.rs2, ins.rs1
 		}
 	case AEBREAK:
 		ins.as, ins.rd, ins.rs1 = ACEBREAK, obj.REG_NONE, obj.REG_NONE
 	}
 }
 // instructionForProg returns the default *obj.Prog to instruction mapping.
 func instructionForProg(p *obj.Prog) *instruction {
 	ins := &instruction{
@ -4057,7 +4214,7 @@ func instructionsForMinMax(p *obj.Prog, ins *instruction) []*instruction {
 }
 // instructionsForProg returns the machine instructions for an *obj.Prog.
-func instructionsForProg(p *obj.Prog) []*instruction {
+func instructionsForProg(p *obj.Prog, compress bool) []*instruction {
 	ins := instructionForProg(p)
 	inss := []*instruction{ins}
@ -4710,6 +4867,15 @@ func instructionsForProg(p *obj.Prog) []*instruction {
 		ins.rs1, ins.rs2 = obj.REG_NONE, REG_V0
 	}
 	// Only compress instructions when there is no relocation, since
 	// relocation relies on knowledge about the exact instructions that
 	// are in use.
 	if compress && p.Mark&NEED_RELOC == 0 {
 		for _, ins := range inss {
 			ins.compress()
 		}
 	}
 	for _, ins := range inss {
 		ins.p = p
 	}
@ -4814,7 +4980,7 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
 		}
 		offset := p.Pc
-		for _, ins := range instructionsForProg(p) {
+		for _, ins := range instructionsForProg(p, ctxt.CompressInstructions) {
 			if ic, err := ins.encode(); err == nil {
 				cursym.WriteInt(ctxt, offset, ins.length(), int64(ic))
 				offset += int64(ins.length())
--- a/src/cmd/link/internal/ld/ld_test.go
+++ b/src/cmd/link/internal/ld/ld_test.go
@ -387,7 +387,7 @@ func TestRISCVTrampolines(t *testing.T) {
 	buf := new(bytes.Buffer)
 	fmt.Fprintf(buf, "TEXT a(SB),$0-0\n")
 	for i := 0; i < 1<<17; i++ {
-		fmt.Fprintf(buf, "\tADD $0, X0, X0\n")
+		fmt.Fprintf(buf, "\tADD $0, X5, X0\n")
 	}
 	fmt.Fprintf(buf, "\tCALL b(SB)\n")
 	fmt.Fprintf(buf, "\tRET\n")
@ -398,7 +398,7 @@ func TestRISCVTrampolines(t *testing.T) {
 	fmt.Fprintf(buf, "\tRET\n")
 	fmt.Fprintf(buf, "TEXT ·d(SB),0,$0-0\n")
 	for i := 0; i < 1<<17; i++ {
-		fmt.Fprintf(buf, "\tADD $0, X0, X0\n")
+		fmt.Fprintf(buf, "\tADD $0, X5, X0\n")
 	}
 	fmt.Fprintf(buf, "\tCALL a(SB)\n")
 	fmt.Fprintf(buf, "\tCALL c(SB)\n")