cmd/asm,cmd/compile,cmd/internal/obj/riscv: use compressed instructions on riscv64

Make use of compressed instructions on riscv64 - add a compress
pass to the end of the assembler, which replaces non-compressed
instructions with compressed alternatives if possible.

Provide a `compressinstructions` compiler and assembler debug
flag, such that the compression pass can be disabled via
`-asmflags=all=-d=compressinstructions=0` and
`-gcflags=all=-d=compressinstructions=0`. Note that this does
not prevent the explicit use of compressed instructions via
assembly.

Note that this does not make use of compressed control transfer
instructions - this will be implemented in later changes.

Reduces the text size of a hello world binary by ~121KB
and reduces the text size of the go binary on riscv64 by ~1.21MB
(between 8-10% in both cases).

Updates #71105

Cq-Include-Trybots: luci.golang.try:gotip-linux-riscv64
Change-Id: I24258353688554042c2a836deed4830cc673e985
Reviewed-on: https://go-review.googlesource.com/c/go/+/523478
Reviewed-by: Mark Ryan <markdryan@rivosinc.com>
Reviewed-by: Mark Freeman <markfreeman@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Joel Sing 2025-09-26 05:05:49 +10:00
parent b9ef0633f6
commit 9859b43643
9 changed files with 225 additions and 48 deletions

View file

@ -29,8 +29,9 @@ var (
) )
var DebugFlags struct { var DebugFlags struct {
MayMoreStack string `help:"call named function before all stack growth checks"` CompressInstructions int `help:"use compressed instructions when possible (if supported by architecture)"`
PCTab string `help:"print named pc-value table\nOne of: pctospadj, pctofile, pctoline, pctoinline, pctopcdata"` MayMoreStack string `help:"call named function before all stack growth checks"`
PCTab string `help:"print named pc-value table\nOne of: pctospadj, pctofile, pctoline, pctoinline, pctopcdata"`
} }
var ( var (
@ -47,6 +48,8 @@ func init() {
flag.Var(objabi.NewDebugFlag(&DebugFlags, nil), "d", "enable debugging settings; try -d help") flag.Var(objabi.NewDebugFlag(&DebugFlags, nil), "d", "enable debugging settings; try -d help")
objabi.AddVersionFlag() // -V objabi.AddVersionFlag() // -V
objabi.Flagcount("S", "print assembly and machine code", &PrintOut) objabi.Flagcount("S", "print assembly and machine code", &PrintOut)
DebugFlags.CompressInstructions = 1
} }
// MultiFlag allows setting a value multiple times to collect a list, as in -I=dir1 -I=dir2. // MultiFlag allows setting a value multiple times to collect a list, as in -I=dir1 -I=dir2.

View file

@ -40,6 +40,7 @@ func main() {
log.Fatalf("unrecognized architecture %s", GOARCH) log.Fatalf("unrecognized architecture %s", GOARCH)
} }
ctxt := obj.Linknew(architecture.LinkArch) ctxt := obj.Linknew(architecture.LinkArch)
ctxt.CompressInstructions = flags.DebugFlags.CompressInstructions != 0
ctxt.Debugasm = flags.PrintOut ctxt.Debugasm = flags.PrintOut
ctxt.Debugvlog = flags.DebugV ctxt.Debugvlog = flags.DebugV
ctxt.Flag_dynlink = *flags.Dynlink ctxt.Flag_dynlink = *flags.Dynlink

View file

@ -20,6 +20,7 @@ type DebugFlags struct {
Append int `help:"print information about append compilation"` Append int `help:"print information about append compilation"`
Checkptr int `help:"instrument unsafe pointer conversions\n0: instrumentation disabled\n1: conversions involving unsafe.Pointer are instrumented\n2: conversions to unsafe.Pointer force heap allocation" concurrent:"ok"` Checkptr int `help:"instrument unsafe pointer conversions\n0: instrumentation disabled\n1: conversions involving unsafe.Pointer are instrumented\n2: conversions to unsafe.Pointer force heap allocation" concurrent:"ok"`
Closure int `help:"print information about closure compilation"` Closure int `help:"print information about closure compilation"`
CompressInstructions int `help:"use compressed instructions when possible (if supported by architecture)"`
Converthash string `help:"hash value for use in debugging changes to platform-dependent float-to-[u]int conversion" concurrent:"ok"` Converthash string `help:"hash value for use in debugging changes to platform-dependent float-to-[u]int conversion" concurrent:"ok"`
Defer int `help:"print information about defer compilation"` Defer int `help:"print information about defer compilation"`
DisableNil int `help:"disable nil checks" concurrent:"ok"` DisableNil int `help:"disable nil checks" concurrent:"ok"`

View file

@ -177,6 +177,7 @@ func ParseFlags() {
Flag.WB = true Flag.WB = true
Debug.ConcurrentOk = true Debug.ConcurrentOk = true
Debug.CompressInstructions = 1
Debug.MaxShapeLen = 500 Debug.MaxShapeLen = 500
Debug.AlignHot = 1 Debug.AlignHot = 1
Debug.InlFuncsWithClosures = 1 Debug.InlFuncsWithClosures = 1
@ -299,6 +300,7 @@ func ParseFlags() {
} }
parseSpectre(Flag.Spectre) // left as string for RecordFlags parseSpectre(Flag.Spectre) // left as string for RecordFlags
Ctxt.CompressInstructions = Debug.CompressInstructions != 0
Ctxt.Flag_shared = Ctxt.Flag_dynlink || Ctxt.Flag_shared Ctxt.Flag_shared = Ctxt.Flag_dynlink || Ctxt.Flag_shared
Ctxt.Flag_optimize = Flag.N == 0 Ctxt.Flag_optimize = Flag.N == 0
Ctxt.Debugasm = int(Flag.S) Ctxt.Debugasm = int(Flag.S)

View file

@ -1153,36 +1153,37 @@ type Func interface {
// Link holds the context for writing object code from a compiler // Link holds the context for writing object code from a compiler
// to be linker input or for reading that input into the linker. // to be linker input or for reading that input into the linker.
type Link struct { type Link struct {
Headtype objabi.HeadType Headtype objabi.HeadType
Arch *LinkArch Arch *LinkArch
Debugasm int CompressInstructions bool // use compressed instructions where possible (if supported by architecture)
Debugvlog bool Debugasm int
Debugpcln string Debugvlog bool
Flag_shared bool Debugpcln string
Flag_dynlink bool Flag_shared bool
Flag_linkshared bool Flag_dynlink bool
Flag_optimize bool Flag_linkshared bool
Flag_locationlists bool Flag_optimize bool
Flag_noRefName bool // do not include referenced symbol names in object file Flag_locationlists bool
Retpoline bool // emit use of retpoline stubs for indirect jmp/call Flag_noRefName bool // do not include referenced symbol names in object file
Flag_maymorestack string // If not "", call this function before stack checks Retpoline bool // emit use of retpoline stubs for indirect jmp/call
Bso *bufio.Writer Flag_maymorestack string // If not "", call this function before stack checks
Pathname string Bso *bufio.Writer
Pkgpath string // the current package's import path Pathname string
hashmu sync.Mutex // protects hash, funchash Pkgpath string // the current package's import path
hash map[string]*LSym // name -> sym mapping hashmu sync.Mutex // protects hash, funchash
funchash map[string]*LSym // name -> sym mapping for ABIInternal syms hash map[string]*LSym // name -> sym mapping
statichash map[string]*LSym // name -> sym mapping for static syms funchash map[string]*LSym // name -> sym mapping for ABIInternal syms
PosTable src.PosTable statichash map[string]*LSym // name -> sym mapping for static syms
InlTree InlTree // global inlining tree used by gc/inl.go PosTable src.PosTable
DwFixups *DwarfFixupTable InlTree InlTree // global inlining tree used by gc/inl.go
DwTextCount int DwFixups *DwarfFixupTable
Imports []goobj.ImportedPkg DwTextCount int
DiagFunc func(string, ...any) Imports []goobj.ImportedPkg
DiagFlush func() DiagFunc func(string, ...any)
DebugInfo func(ctxt *Link, fn *LSym, info *LSym, curfn Func) ([]dwarf.Scope, dwarf.InlCalls) DiagFlush func()
GenAbstractFunc func(fn *LSym) DebugInfo func(ctxt *Link, fn *LSym, info *LSym, curfn Func) ([]dwarf.Scope, dwarf.InlCalls)
Errors int GenAbstractFunc func(fn *LSym)
Errors int
InParallel bool // parallel backend phase in effect InParallel bool // parallel backend phase in effect
UseBASEntries bool // use Base Address Selection Entries in location lists and PC ranges UseBASEntries bool // use Base Address Selection Entries in location lists and PC ranges

View file

@ -11,8 +11,8 @@ import (
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"regexp"
"runtime" "runtime"
"strings"
"testing" "testing"
) )
@ -48,10 +48,10 @@ func genLargeBranch(buf *bytes.Buffer) {
fmt.Fprintln(buf, "TEXT f(SB),0,$0-0") fmt.Fprintln(buf, "TEXT f(SB),0,$0-0")
fmt.Fprintln(buf, "BEQ X0, X0, label") fmt.Fprintln(buf, "BEQ X0, X0, label")
for i := 0; i < 1<<19; i++ { for i := 0; i < 1<<19; i++ {
fmt.Fprintln(buf, "ADD $0, X0, X0") fmt.Fprintln(buf, "ADD $0, X5, X0")
} }
fmt.Fprintln(buf, "label:") fmt.Fprintln(buf, "label:")
fmt.Fprintln(buf, "ADD $0, X0, X0") fmt.Fprintln(buf, "ADD $0, X5, X0")
} }
// TestLargeCall generates a large function (>1MB of text) with a call to // TestLargeCall generates a large function (>1MB of text) with a call to
@ -112,11 +112,11 @@ func genLargeCall(buf *bytes.Buffer) {
fmt.Fprintln(buf, "TEXT ·x(SB),0,$0-0") fmt.Fprintln(buf, "TEXT ·x(SB),0,$0-0")
fmt.Fprintln(buf, "CALL ·y(SB)") fmt.Fprintln(buf, "CALL ·y(SB)")
for i := 0; i < 1<<19; i++ { for i := 0; i < 1<<19; i++ {
fmt.Fprintln(buf, "ADD $0, X0, X0") fmt.Fprintln(buf, "ADD $0, X5, X0")
} }
fmt.Fprintln(buf, "RET") fmt.Fprintln(buf, "RET")
fmt.Fprintln(buf, "TEXT ·y(SB),0,$0-0") fmt.Fprintln(buf, "TEXT ·y(SB),0,$0-0")
fmt.Fprintln(buf, "ADD $0, X0, X0") fmt.Fprintln(buf, "ADD $0, X5, X0")
fmt.Fprintln(buf, "RET") fmt.Fprintln(buf, "RET")
} }
@ -301,9 +301,9 @@ TEXT _stub(SB),$0-0
// FENCE // FENCE
// NOP // NOP
// FENCE // FENCE
// RET // RET (CJALR or JALR)
want := "0f 00 f0 0f 13 00 00 00 0f 00 f0 0f 67 80 00 00" want := regexp.MustCompile("0x0000 0f 00 f0 0f 13 00 00 00 0f 00 f0 0f (82 80|67 80 00 00) ")
if !strings.Contains(string(out), want) { if !want.Match(out) {
t.Errorf("PCALIGN test failed - got %s\nwant %s", out, want) t.Errorf("PCALIGN test failed - got %s\nwant %s", out, want)
} }
} }

View file

@ -326,6 +326,9 @@ const (
NEED_GOT_PCREL_ITYPE_RELOC NEED_GOT_PCREL_ITYPE_RELOC
) )
const NEED_RELOC = NEED_JAL_RELOC | NEED_CALL_RELOC | NEED_PCREL_ITYPE_RELOC |
NEED_PCREL_STYPE_RELOC | NEED_GOT_PCREL_ITYPE_RELOC
// RISC-V mnemonics, as defined in the "opcodes" and "opcodes-pseudo" files // RISC-V mnemonics, as defined in the "opcodes" and "opcodes-pseudo" files
// at https://github.com/riscv/riscv-opcodes. // at https://github.com/riscv/riscv-opcodes.
// //

View file

@ -414,10 +414,10 @@ func containsCall(sym *obj.LSym) bool {
// setPCs sets the Pc field in all instructions reachable from p. // setPCs sets the Pc field in all instructions reachable from p.
// It uses pc as the initial value and returns the next available pc. // It uses pc as the initial value and returns the next available pc.
func setPCs(p *obj.Prog, pc int64) int64 { func setPCs(p *obj.Prog, pc int64, compress bool) int64 {
for ; p != nil; p = p.Link { for ; p != nil; p = p.Link {
p.Pc = pc p.Pc = pc
for _, ins := range instructionsForProg(p) { for _, ins := range instructionsForProg(p, compress) {
pc += int64(ins.length()) pc += int64(ins.length())
} }
@ -671,7 +671,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
// a fixed point will be reached). No attempt to handle functions > 2GiB. // a fixed point will be reached). No attempt to handle functions > 2GiB.
for { for {
big, rescan := false, false big, rescan := false, false
maxPC := setPCs(cursym.Func().Text, 0) maxPC := setPCs(cursym.Func().Text, 0, ctxt.CompressInstructions)
if maxPC+maxTrampSize > (1 << 20) { if maxPC+maxTrampSize > (1 << 20) {
big = true big = true
} }
@ -801,7 +801,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
// Validate all instructions - this provides nice error messages. // Validate all instructions - this provides nice error messages.
for p := cursym.Func().Text; p != nil; p = p.Link { for p := cursym.Func().Text; p != nil; p = p.Link {
for _, ins := range instructionsForProg(p) { for _, ins := range instructionsForProg(p, ctxt.CompressInstructions) {
ins.validate(ctxt) ins.validate(ctxt)
} }
} }
@ -1141,6 +1141,14 @@ func wantImmU(ctxt *obj.Link, ins *instruction, imm int64, nbits uint) {
} }
} }
func isScaledImmI(imm int64, nbits uint, scale int64) bool {
return immFits(imm, nbits, true) == nil && imm%scale == 0
}
func isScaledImmU(imm int64, nbits uint, scale int64) bool {
return immFits(imm, nbits, false) == nil && imm%scale == 0
}
func wantScaledImm(ctxt *obj.Link, ins *instruction, imm int64, nbits uint, scale int64, signed bool) { func wantScaledImm(ctxt *obj.Link, ins *instruction, imm int64, nbits uint, scale int64, signed bool) {
if err := immFits(imm, nbits, signed); err != nil { if err := immFits(imm, nbits, signed); err != nil {
ctxt.Diag("%v: %v", ins, err) ctxt.Diag("%v: %v", ins, err)
@ -1180,6 +1188,10 @@ func wantIntReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
wantReg(ctxt, ins, pos, "integer", r, REG_X0, REG_X31) wantReg(ctxt, ins, pos, "integer", r, REG_X0, REG_X31)
} }
func isIntPrimeReg(r uint32) bool {
return r >= REG_X8 && r <= REG_X15
}
// wantIntPrimeReg checks that r is an integer register that can be used // wantIntPrimeReg checks that r is an integer register that can be used
// in a prime register field of a compressed instruction. // in a prime register field of a compressed instruction.
func wantIntPrimeReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) { func wantIntPrimeReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
@ -1191,6 +1203,10 @@ func wantFloatReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
wantReg(ctxt, ins, pos, "float", r, REG_F0, REG_F31) wantReg(ctxt, ins, pos, "float", r, REG_F0, REG_F31)
} }
func isFloatPrimeReg(r uint32) bool {
return r >= REG_F8 && r <= REG_F15
}
// wantFloatPrimeReg checks that r is an floating-point register that can // wantFloatPrimeReg checks that r is an floating-point register that can
// be used in a prime register field of a compressed instruction. // be used in a prime register field of a compressed instruction.
func wantFloatPrimeReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) { func wantFloatPrimeReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
@ -3515,6 +3531,147 @@ func (ins *instruction) usesRegTmp() bool {
return ins.rd == REG_TMP || ins.rs1 == REG_TMP || ins.rs2 == REG_TMP return ins.rd == REG_TMP || ins.rs1 == REG_TMP || ins.rs2 == REG_TMP
} }
func (ins *instruction) compress() {
switch ins.as {
case ALW:
if ins.rd != REG_X0 && ins.rs1 == REG_SP && isScaledImmU(ins.imm, 8, 4) {
ins.as, ins.rs1, ins.rs2 = ACLWSP, obj.REG_NONE, ins.rs1
} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 7, 4) {
ins.as = ACLW
}
case ALD:
if ins.rs1 == REG_SP && ins.rd != REG_X0 && isScaledImmU(ins.imm, 9, 8) {
ins.as, ins.rs1, ins.rs2 = ACLDSP, obj.REG_NONE, ins.rs1
} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) {
ins.as = ACLD
}
case AFLD:
if ins.rs1 == REG_SP && isScaledImmU(ins.imm, 9, 8) {
ins.as, ins.rs1, ins.rs2 = ACFLDSP, obj.REG_NONE, ins.rs1
} else if isFloatPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) {
ins.as = ACFLD
}
case ASW:
if ins.rd == REG_SP && isScaledImmU(ins.imm, 8, 4) {
ins.as, ins.rs1, ins.rs2 = ACSWSP, obj.REG_NONE, ins.rs1
} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 7, 4) {
ins.as, ins.rd, ins.rs1, ins.rs2 = ACSW, obj.REG_NONE, ins.rd, ins.rs1
}
case ASD:
if ins.rd == REG_SP && isScaledImmU(ins.imm, 9, 8) {
ins.as, ins.rs1, ins.rs2 = ACSDSP, obj.REG_NONE, ins.rs1
} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) {
ins.as, ins.rd, ins.rs1, ins.rs2 = ACSD, obj.REG_NONE, ins.rd, ins.rs1
}
case AFSD:
if ins.rd == REG_SP && isScaledImmU(ins.imm, 9, 8) {
ins.as, ins.rs1, ins.rs2 = ACFSDSP, obj.REG_NONE, ins.rs1
} else if isIntPrimeReg(ins.rd) && isFloatPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) {
ins.as, ins.rd, ins.rs1, ins.rs2 = ACFSD, obj.REG_NONE, ins.rd, ins.rs1
}
case AADDI:
if ins.rd == REG_SP && ins.rs1 == REG_SP && ins.imm != 0 && isScaledImmI(ins.imm, 10, 16) {
ins.as = ACADDI16SP
} else if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.imm != 0 && immIFits(ins.imm, 6) == nil {
ins.as = ACADDI
} else if isIntPrimeReg(ins.rd) && ins.rs1 == REG_SP && ins.imm != 0 && isScaledImmU(ins.imm, 10, 4) {
ins.as = ACADDI4SPN
} else if ins.rd != REG_X0 && ins.rs1 == REG_X0 && immIFits(ins.imm, 6) == nil {
ins.as, ins.rs1 = ACLI, obj.REG_NONE
} else if ins.rd != REG_X0 && ins.rs1 != REG_X0 && ins.imm == 0 {
ins.as, ins.rs1, ins.rs2 = ACMV, obj.REG_NONE, ins.rs1
} else if ins.rd == REG_X0 && ins.rs1 == REG_X0 && ins.imm == 0 {
ins.as, ins.rs1 = ACNOP, ins.rd
}
case AADDIW:
if ins.rd == ins.rs1 && immIFits(ins.imm, 6) == nil {
ins.as = ACADDIW
}
case ALUI:
if ins.rd != REG_X0 && ins.rd != REG_SP && ins.imm != 0 && immIFits(ins.imm, 6) == nil {
ins.as = ACLUI
}
case ASLLI:
if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.imm != 0 {
ins.as = ACSLLI
}
case ASRLI:
if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && ins.imm != 0 {
ins.as = ACSRLI
}
case ASRAI:
if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && ins.imm != 0 {
ins.as = ACSRAI
}
case AANDI:
if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && immIFits(ins.imm, 6) == nil {
ins.as = ACANDI
}
case AADD:
if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.rs2 != REG_X0 {
ins.as = ACADD
} else if ins.rd != REG_X0 && ins.rd == ins.rs2 && ins.rs1 != REG_X0 {
ins.as, ins.rs1, ins.rs2 = ACADD, ins.rs2, ins.rs1
} else if ins.rd != REG_X0 && ins.rs1 == REG_X0 && ins.rs2 != REG_X0 {
ins.as = ACMV
}
case AADDW:
if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
ins.as = ACADDW
} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 {
ins.as, ins.rs1, ins.rs2 = ACADDW, ins.rs2, ins.rs1
}
case ASUB:
if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
ins.as = ACSUB
}
case ASUBW:
if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
ins.as = ACSUBW
}
case AAND:
if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
ins.as = ACAND
} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 {
ins.as, ins.rs1, ins.rs2 = ACAND, ins.rs2, ins.rs1
}
case AOR:
if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
ins.as = ACOR
} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 {
ins.as, ins.rs1, ins.rs2 = ACOR, ins.rs2, ins.rs1
}
case AXOR:
if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
ins.as = ACXOR
} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 {
ins.as, ins.rs1, ins.rs2 = ACXOR, ins.rs2, ins.rs1
}
case AEBREAK:
ins.as, ins.rd, ins.rs1 = ACEBREAK, obj.REG_NONE, obj.REG_NONE
}
}
// instructionForProg returns the default *obj.Prog to instruction mapping. // instructionForProg returns the default *obj.Prog to instruction mapping.
func instructionForProg(p *obj.Prog) *instruction { func instructionForProg(p *obj.Prog) *instruction {
ins := &instruction{ ins := &instruction{
@ -4057,7 +4214,7 @@ func instructionsForMinMax(p *obj.Prog, ins *instruction) []*instruction {
} }
// instructionsForProg returns the machine instructions for an *obj.Prog. // instructionsForProg returns the machine instructions for an *obj.Prog.
func instructionsForProg(p *obj.Prog) []*instruction { func instructionsForProg(p *obj.Prog, compress bool) []*instruction {
ins := instructionForProg(p) ins := instructionForProg(p)
inss := []*instruction{ins} inss := []*instruction{ins}
@ -4710,6 +4867,15 @@ func instructionsForProg(p *obj.Prog) []*instruction {
ins.rs1, ins.rs2 = obj.REG_NONE, REG_V0 ins.rs1, ins.rs2 = obj.REG_NONE, REG_V0
} }
// Only compress instructions when there is no relocation, since
// relocation relies on knowledge about the exact instructions that
// are in use.
if compress && p.Mark&NEED_RELOC == 0 {
for _, ins := range inss {
ins.compress()
}
}
for _, ins := range inss { for _, ins := range inss {
ins.p = p ins.p = p
} }
@ -4814,7 +4980,7 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
} }
offset := p.Pc offset := p.Pc
for _, ins := range instructionsForProg(p) { for _, ins := range instructionsForProg(p, ctxt.CompressInstructions) {
if ic, err := ins.encode(); err == nil { if ic, err := ins.encode(); err == nil {
cursym.WriteInt(ctxt, offset, ins.length(), int64(ic)) cursym.WriteInt(ctxt, offset, ins.length(), int64(ic))
offset += int64(ins.length()) offset += int64(ins.length())

View file

@ -387,7 +387,7 @@ func TestRISCVTrampolines(t *testing.T) {
buf := new(bytes.Buffer) buf := new(bytes.Buffer)
fmt.Fprintf(buf, "TEXT a(SB),$0-0\n") fmt.Fprintf(buf, "TEXT a(SB),$0-0\n")
for i := 0; i < 1<<17; i++ { for i := 0; i < 1<<17; i++ {
fmt.Fprintf(buf, "\tADD $0, X0, X0\n") fmt.Fprintf(buf, "\tADD $0, X5, X0\n")
} }
fmt.Fprintf(buf, "\tCALL b(SB)\n") fmt.Fprintf(buf, "\tCALL b(SB)\n")
fmt.Fprintf(buf, "\tRET\n") fmt.Fprintf(buf, "\tRET\n")
@ -398,7 +398,7 @@ func TestRISCVTrampolines(t *testing.T) {
fmt.Fprintf(buf, "\tRET\n") fmt.Fprintf(buf, "\tRET\n")
fmt.Fprintf(buf, "TEXT ·d(SB),0,$0-0\n") fmt.Fprintf(buf, "TEXT ·d(SB),0,$0-0\n")
for i := 0; i < 1<<17; i++ { for i := 0; i < 1<<17; i++ {
fmt.Fprintf(buf, "\tADD $0, X0, X0\n") fmt.Fprintf(buf, "\tADD $0, X5, X0\n")
} }
fmt.Fprintf(buf, "\tCALL a(SB)\n") fmt.Fprintf(buf, "\tCALL a(SB)\n")
fmt.Fprintf(buf, "\tCALL c(SB)\n") fmt.Fprintf(buf, "\tCALL c(SB)\n")