cmd/objdump: move armasm, x86asm into internal packages

For Go 1.3 these external packages were collapsed into large single-file implementations stored in the cmd/objdump directory. For Go 1.4 we want pprof to be able to link against them too, so move them into cmd/internal, where they can be shared. The new files are copied from the repo in the file path (rsc.io/...). Those repos were code reviewed during development (mainly by crawshaw and minux), because we knew the main repo would use them. Update #8798 LGTM=bradfitz R=crawshaw, bradfitz CC=golang-codereviews https://golang.org/cl/153750044
2025-12-08 06:10:04 +00:00 · 2014-09-30 12:28:24 -04:00 · 2014-09-30 12:28:24 -04:00 · c75f81f0ed
commit c75f81f0ed
parent 7de0c315f6
33 changed files with 35092 additions and 24625 deletions
--- a/src/cmd/internal/rsc.io/arm/armasm/Makefile
+++ b/src/cmd/internal/rsc.io/arm/armasm/Makefile
@ -0,0 +1,2 @@
+tables.go: ../armmap/map.go ../arm.csv 
+	go run ../armmap/map.go -fmt=decoder ../arm.csv >_tables.go && gofmt _tables.go >tables.go && rm _tables.go
--- a/src/cmd/internal/rsc.io/arm/armasm/decode.go
+++ b/src/cmd/internal/rsc.io/arm/armasm/decode.go
@ -0,0 +1,567 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package armasm
+
+import (
+	"encoding/binary"
+	"fmt"
+)
+
+// An instFormat describes the format of an instruction encoding.
+// An instruction with 32-bit value x matches the format if x&mask == value
+// and the condition matches.
+// The condition matches if x>>28 == 0xF && value>>28==0xF
+// or if x>>28 != 0xF and value>>28 == 0.
+// If x matches the format, then the rest of the fields describe how to interpret x.
+// The opBits describe bits that should be extracted from x and added to the opcode.
+// For example opBits = 0x1234 means that the value
+//	(2 bits at offset 1) followed by (4 bits at offset 3)
+// should be added to op.
+// Finally the args describe how to decode the instruction arguments.
+// args is stored as a fixed-size array; if there are fewer than len(args) arguments,
+// args[i] == 0 marks the end of the argument list.
+type instFormat struct {
+	mask     uint32
+	value    uint32
+	priority int8
+	op       Op
+	opBits   uint64
+	args     instArgs
+}
+
+type instArgs [4]instArg
+
+var (
+	errMode    = fmt.Errorf("unsupported execution mode")
+	errShort   = fmt.Errorf("truncated instruction")
+	errUnknown = fmt.Errorf("unknown instruction")
+)
+
+var decoderCover []bool
+
+// Decode decodes the leading bytes in src as a single instruction.
+func Decode(src []byte, mode Mode) (inst Inst, err error) {
+	if mode != ModeARM {
+		return Inst{}, errMode
+	}
+	if len(src) < 4 {
+		return Inst{}, errShort
+	}
+
+	if decoderCover == nil {
+		decoderCover = make([]bool, len(instFormats))
+	}
+
+	x := binary.LittleEndian.Uint32(src)
+
+	// The instFormat table contains both conditional and unconditional instructions.
+	// Considering only the top 4 bits, the conditional instructions use mask=0, value=0,
+	// while the unconditional instructions use mask=f, value=f.
+	// Prepare a version of x with the condition cleared to 0 in conditional instructions
+	// and then assume mask=f during matching.
+	const condMask = 0xf0000000
+	xNoCond := x
+	if x&condMask != condMask {
+		xNoCond &^= condMask
+	}
+	var priority int8
+Search:
+	for i := range instFormats {
+		f := &instFormats[i]
+		if xNoCond&(f.mask|condMask) != f.value || f.priority <= priority {
+			continue
+		}
+		delta := uint32(0)
+		deltaShift := uint(0)
+		for opBits := f.opBits; opBits != 0; opBits >>= 16 {
+			n := uint(opBits & 0xFF)
+			off := uint((opBits >> 8) & 0xFF)
+			delta |= (x >> off) & (1<<n - 1) << deltaShift
+			deltaShift += n
+		}
+		op := f.op + Op(delta)
+
+		// Special case: BKPT encodes with condition but cannot have one.
+		if op&^15 == BKPT_EQ && op != BKPT {
+			continue Search
+		}
+
+		var args Args
+		for j, aop := range f.args {
+			if aop == 0 {
+				break
+			}
+			arg := decodeArg(aop, x)
+			if arg == nil { // cannot decode argument
+				continue Search
+			}
+			args[j] = arg
+		}
+
+		decoderCover[i] = true
+
+		inst = Inst{
+			Op:   op,
+			Args: args,
+			Enc:  x,
+			Len:  4,
+		}
+		priority = f.priority
+		continue Search
+	}
+	if inst.Op != 0 {
+		return inst, nil
+	}
+	return Inst{}, errUnknown
+}
+
+// An instArg describes the encoding of a single argument.
+// In the names used for arguments, _p_ means +, _m_ means -,
+// _pm_ means ± (usually keyed by the U bit).
+// The _W suffix indicates a general addressing mode based on the P and W bits.
+// The _offset and _postindex suffixes force the given addressing mode.
+// The rest should be somewhat self-explanatory, at least given
+// the decodeArg function.
+type instArg uint8
+
+const (
+	_ instArg = iota
+	arg_APSR
+	arg_FPSCR
+	arg_Dn_half
+	arg_R1_0
+	arg_R1_12
+	arg_R2_0
+	arg_R2_12
+	arg_R_0
+	arg_R_12
+	arg_R_12_nzcv
+	arg_R_16
+	arg_R_16_WB
+	arg_R_8
+	arg_R_rotate
+	arg_R_shift_R
+	arg_R_shift_imm
+	arg_SP
+	arg_Sd
+	arg_Sd_Dd
+	arg_Dd_Sd
+	arg_Sm
+	arg_Sm_Dm
+	arg_Sn
+	arg_Sn_Dn
+	arg_const
+	arg_endian
+	arg_fbits
+	arg_fp_0
+	arg_imm24
+	arg_imm5
+	arg_imm5_32
+	arg_imm5_nz
+	arg_imm_12at8_4at0
+	arg_imm_4at16_12at0
+	arg_imm_vfp
+	arg_label24
+	arg_label24H
+	arg_label_m_12
+	arg_label_p_12
+	arg_label_pm_12
+	arg_label_pm_4_4
+	arg_lsb_width
+	arg_mem_R
+	arg_mem_R_pm_R_W
+	arg_mem_R_pm_R_postindex
+	arg_mem_R_pm_R_shift_imm_W
+	arg_mem_R_pm_R_shift_imm_offset
+	arg_mem_R_pm_R_shift_imm_postindex
+	arg_mem_R_pm_imm12_W
+	arg_mem_R_pm_imm12_offset
+	arg_mem_R_pm_imm12_postindex
+	arg_mem_R_pm_imm8_W
+	arg_mem_R_pm_imm8_postindex
+	arg_mem_R_pm_imm8at0_offset
+	arg_option
+	arg_registers
+	arg_registers1
+	arg_registers2
+	arg_satimm4
+	arg_satimm5
+	arg_satimm4m1
+	arg_satimm5m1
+	arg_widthm1
+)
+
+// decodeArg decodes the arg described by aop from the instruction bits x.
+// It returns nil if x cannot be decoded according to aop.
+func decodeArg(aop instArg, x uint32) Arg {
+	switch aop {
+	default:
+		return nil
+
+	case arg_APSR:
+		return APSR
+	case arg_FPSCR:
+		return FPSCR
+
+	case arg_R_0:
+		return Reg(x & (1<<4 - 1))
+	case arg_R_8:
+		return Reg((x >> 8) & (1<<4 - 1))
+	case arg_R_12:
+		return Reg((x >> 12) & (1<<4 - 1))
+	case arg_R_16:
+		return Reg((x >> 16) & (1<<4 - 1))
+
+	case arg_R_12_nzcv:
+		r := Reg((x >> 12) & (1<<4 - 1))
+		if r == R15 {
+			return APSR_nzcv
+		}
+		return r
+
+	case arg_R_16_WB:
+		mode := AddrLDM
+		if (x>>21)&1 != 0 {
+			mode = AddrLDM_WB
+		}
+		return Mem{Base: Reg((x >> 16) & (1<<4 - 1)), Mode: mode}
+
+	case arg_R_rotate:
+		Rm := Reg(x & (1<<4 - 1))
+		typ, count := decodeShift(x)
+		// ROR #0 here means ROR #0, but decodeShift rewrites to RRX #1.
+		if typ == RotateRightExt {
+			return Reg(Rm)
+		}
+		return RegShift{Rm, typ, uint8(count)}
+
+	case arg_R_shift_R:
+		Rm := Reg(x & (1<<4 - 1))
+		Rs := Reg((x >> 8) & (1<<4 - 1))
+		typ := Shift((x >> 5) & (1<<2 - 1))
+		return RegShiftReg{Rm, typ, Rs}
+
+	case arg_R_shift_imm:
+		Rm := Reg(x & (1<<4 - 1))
+		typ, count := decodeShift(x)
+		if typ == ShiftLeft && count == 0 {
+			return Reg(Rm)
+		}
+		return RegShift{Rm, typ, uint8(count)}
+
+	case arg_R1_0:
+		return Reg((x & (1<<4 - 1)))
+	case arg_R1_12:
+		return Reg(((x >> 12) & (1<<4 - 1)))
+	case arg_R2_0:
+		return Reg((x & (1<<4 - 1)) | 1)
+	case arg_R2_12:
+		return Reg(((x >> 12) & (1<<4 - 1)) | 1)
+
+	case arg_SP:
+		return SP
+
+	case arg_Sd_Dd:
+		v := (x >> 12) & (1<<4 - 1)
+		vx := (x >> 22) & 1
+		sz := (x >> 8) & 1
+		if sz != 0 {
+			return D0 + Reg(vx<<4+v)
+		} else {
+			return S0 + Reg(v<<1+vx)
+		}
+
+	case arg_Dd_Sd:
+		return decodeArg(arg_Sd_Dd, x^(1<<8))
+
+	case arg_Sd:
+		v := (x >> 12) & (1<<4 - 1)
+		vx := (x >> 22) & 1
+		return S0 + Reg(v<<1+vx)
+
+	case arg_Sm_Dm:
+		v := (x >> 0) & (1<<4 - 1)
+		vx := (x >> 5) & 1
+		sz := (x >> 8) & 1
+		if sz != 0 {
+			return D0 + Reg(vx<<4+v)
+		} else {
+			return S0 + Reg(v<<1+vx)
+		}
+
+	case arg_Sm:
+		v := (x >> 0) & (1<<4 - 1)
+		vx := (x >> 5) & 1
+		return S0 + Reg(v<<1+vx)
+
+	case arg_Dn_half:
+		v := (x >> 16) & (1<<4 - 1)
+		vx := (x >> 7) & 1
+		return RegX{D0 + Reg(vx<<4+v), int((x >> 21) & 1)}
+
+	case arg_Sn_Dn:
+		v := (x >> 16) & (1<<4 - 1)
+		vx := (x >> 7) & 1
+		sz := (x >> 8) & 1
+		if sz != 0 {
+			return D0 + Reg(vx<<4+v)
+		} else {
+			return S0 + Reg(v<<1+vx)
+		}
+
+	case arg_Sn:
+		v := (x >> 16) & (1<<4 - 1)
+		vx := (x >> 7) & 1
+		return S0 + Reg(v<<1+vx)
+
+	case arg_const:
+		v := x & (1<<8 - 1)
+		rot := (x >> 8) & (1<<4 - 1) * 2
+		if rot > 0 && v&3 == 0 {
+			// could rotate less
+			return ImmAlt{uint8(v), uint8(rot)}
+		}
+		if rot >= 24 && ((v<<(32-rot))&0xFF)>>(32-rot) == v {
+			// could wrap around to rot==0.
+			return ImmAlt{uint8(v), uint8(rot)}
+		}
+		return Imm(v>>rot | v<<(32-rot))
+
+	case arg_endian:
+		return Endian((x >> 9) & 1)
+
+	case arg_fbits:
+		return Imm((16 << ((x >> 7) & 1)) - ((x&(1<<4-1))<<1 | (x>>5)&1))
+
+	case arg_fp_0:
+		return Imm(0)
+
+	case arg_imm24:
+		return Imm(x & (1<<24 - 1))
+
+	case arg_imm5:
+		return Imm((x >> 7) & (1<<5 - 1))
+
+	case arg_imm5_32:
+		x = (x >> 7) & (1<<5 - 1)
+		if x == 0 {
+			x = 32
+		}
+		return Imm(x)
+
+	case arg_imm5_nz:
+		x = (x >> 7) & (1<<5 - 1)
+		if x == 0 {
+			return nil
+		}
+		return Imm(x)
+
+	case arg_imm_4at16_12at0:
+		return Imm((x>>16)&(1<<4-1)<<12 | x&(1<<12-1))
+
+	case arg_imm_12at8_4at0:
+		return Imm((x>>8)&(1<<12-1)<<4 | x&(1<<4-1))
+
+	case arg_imm_vfp:
+		x = (x>>16)&(1<<4-1)<<4 | x&(1<<4-1)
+		return Imm(x)
+
+	case arg_label24:
+		imm := (x & (1<<24 - 1)) << 2
+		return PCRel(int32(imm<<6) >> 6)
+
+	case arg_label24H:
+		h := (x >> 24) & 1
+		imm := (x&(1<<24-1))<<2 | h<<1
+		return PCRel(int32(imm<<6) >> 6)
+
+	case arg_label_m_12:
+		d := int32(x & (1<<12 - 1))
+		return Mem{Base: PC, Mode: AddrOffset, Offset: int16(-d)}
+
+	case arg_label_p_12:
+		d := int32(x & (1<<12 - 1))
+		return Mem{Base: PC, Mode: AddrOffset, Offset: int16(d)}
+
+	case arg_label_pm_12:
+		d := int32(x & (1<<12 - 1))
+		u := (x >> 23) & 1
+		if u == 0 {
+			d = -d
+		}
+		return Mem{Base: PC, Mode: AddrOffset, Offset: int16(d)}
+
+	case arg_label_pm_4_4:
+		d := int32((x>>8)&(1<<4-1)<<4 | x&(1<<4-1))
+		u := (x >> 23) & 1
+		if u == 0 {
+			d = -d
+		}
+		return PCRel(d)
+
+	case arg_lsb_width:
+		lsb := (x >> 7) & (1<<5 - 1)
+		msb := (x >> 16) & (1<<5 - 1)
+		if msb < lsb || msb >= 32 {
+			return nil
+		}
+		return Imm(msb + 1 - lsb)
+
+	case arg_mem_R:
+		Rn := Reg((x >> 16) & (1<<4 - 1))
+		return Mem{Base: Rn, Mode: AddrOffset}
+
+	case arg_mem_R_pm_R_postindex:
+		// Treat [<Rn>],+/-<Rm> like [<Rn>,+/-<Rm>{,<shift>}]{!}
+		// by forcing shift bits to <<0 and P=0, W=0 (postindex=true).
+		return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^((1<<7-1)<<5|1<<24|1<<21))
+
+	case arg_mem_R_pm_R_W:
+		// Treat [<Rn>,+/-<Rm>]{!} like [<Rn>,+/-<Rm>{,<shift>}]{!}
+		// by forcing shift bits to <<0.
+		return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^((1<<7-1)<<5))
+
+	case arg_mem_R_pm_R_shift_imm_offset:
+		// Treat [<Rn>],+/-<Rm>{,<shift>} like [<Rn>,+/-<Rm>{,<shift>}]{!}
+		// by forcing P=1, W=0 (index=false, wback=false).
+		return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^(1<<21)|1<<24)
+
+	case arg_mem_R_pm_R_shift_imm_postindex:
+		// Treat [<Rn>],+/-<Rm>{,<shift>} like [<Rn>,+/-<Rm>{,<shift>}]{!}
+		// by forcing P=0, W=0 (postindex=true).
+		return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^(1<<24|1<<21))
+
+	case arg_mem_R_pm_R_shift_imm_W:
+		Rn := Reg((x >> 16) & (1<<4 - 1))
+		Rm := Reg(x & (1<<4 - 1))
+		typ, count := decodeShift(x)
+		u := (x >> 23) & 1
+		w := (x >> 21) & 1
+		p := (x >> 24) & 1
+		if p == 0 && w == 1 {
+			return nil
+		}
+		sign := int8(+1)
+		if u == 0 {
+			sign = -1
+		}
+		mode := AddrMode(uint8(p<<1) | uint8(w^1))
+		return Mem{Base: Rn, Mode: mode, Sign: sign, Index: Rm, Shift: typ, Count: count}
+
+	case arg_mem_R_pm_imm12_offset:
+		// Treat [<Rn>,#+/-<imm12>] like [<Rn>{,#+/-<imm12>}]{!}
+		// by forcing P=1, W=0 (index=false, wback=false).
+		return decodeArg(arg_mem_R_pm_imm12_W, x&^(1<<21)|1<<24)
+
+	case arg_mem_R_pm_imm12_postindex:
+		// Treat [<Rn>],#+/-<imm12> like [<Rn>{,#+/-<imm12>}]{!}
+		// by forcing P=0, W=0 (postindex=true).
+		return decodeArg(arg_mem_R_pm_imm12_W, x&^(1<<24|1<<21))
+
+	case arg_mem_R_pm_imm12_W:
+		Rn := Reg((x >> 16) & (1<<4 - 1))
+		u := (x >> 23) & 1
+		w := (x >> 21) & 1
+		p := (x >> 24) & 1
+		if p == 0 && w == 1 {
+			return nil
+		}
+		sign := int8(+1)
+		if u == 0 {
+			sign = -1
+		}
+		imm := int16(x & (1<<12 - 1))
+		mode := AddrMode(uint8(p<<1) | uint8(w^1))
+		return Mem{Base: Rn, Mode: mode, Offset: int16(sign) * imm}
+
+	case arg_mem_R_pm_imm8_postindex:
+		// Treat [<Rn>],#+/-<imm8> like [<Rn>{,#+/-<imm8>}]{!}
+		// by forcing P=0, W=0 (postindex=true).
+		return decodeArg(arg_mem_R_pm_imm8_W, x&^(1<<24|1<<21))
+
+	case arg_mem_R_pm_imm8_W:
+		Rn := Reg((x >> 16) & (1<<4 - 1))
+		u := (x >> 23) & 1
+		w := (x >> 21) & 1
+		p := (x >> 24) & 1
+		if p == 0 && w == 1 {
+			return nil
+		}
+		sign := int8(+1)
+		if u == 0 {
+			sign = -1
+		}
+		imm := int16((x>>8)&(1<<4-1)<<4 | x&(1<<4-1))
+		mode := AddrMode(uint8(p<<1) | uint8(w^1))
+		return Mem{Base: Rn, Mode: mode, Offset: int16(sign) * imm}
+
+	case arg_mem_R_pm_imm8at0_offset:
+		Rn := Reg((x >> 16) & (1<<4 - 1))
+		u := (x >> 23) & 1
+		sign := int8(+1)
+		if u == 0 {
+			sign = -1
+		}
+		imm := int16(x&(1<<8-1)) << 2
+		return Mem{Base: Rn, Mode: AddrOffset, Offset: int16(sign) * imm}
+
+	case arg_option:
+		return Imm(x & (1<<4 - 1))
+
+	case arg_registers:
+		return RegList(x & (1<<16 - 1))
+
+	case arg_registers2:
+		x &= 1<<16 - 1
+		n := 0
+		for i := 0; i < 16; i++ {
+			if x>>uint(i)&1 != 0 {
+				n++
+			}
+		}
+		if n < 2 {
+			return nil
+		}
+		return RegList(x)
+
+	case arg_registers1:
+		Rt := (x >> 12) & (1<<4 - 1)
+		return RegList(1 << Rt)
+
+	case arg_satimm4:
+		return Imm((x >> 16) & (1<<4 - 1))
+
+	case arg_satimm5:
+		return Imm((x >> 16) & (1<<5 - 1))
+
+	case arg_satimm4m1:
+		return Imm((x>>16)&(1<<4-1) + 1)
+
+	case arg_satimm5m1:
+		return Imm((x>>16)&(1<<5-1) + 1)
+
+	case arg_widthm1:
+		return Imm((x>>16)&(1<<5-1) + 1)
+
+	}
+}
+
+// decodeShift decodes the shift-by-immediate encoded in x.
+func decodeShift(x uint32) (Shift, uint8) {
+	count := (x >> 7) & (1<<5 - 1)
+	typ := Shift((x >> 5) & (1<<2 - 1))
+	switch typ {
+	case ShiftRight, ShiftRightSigned:
+		if count == 0 {
+			count = 32
+		}
+	case RotateRight:
+		if count == 0 {
+			typ = RotateRightExt
+			count = 1
+		}
+	}
+	return typ, uint8(count)
+}
--- a/src/cmd/internal/rsc.io/arm/armasm/decode_test.go
+++ b/src/cmd/internal/rsc.io/arm/armasm/decode_test.go
@ -0,0 +1,69 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package armasm
+
+import (
+	"encoding/hex"
+	"io/ioutil"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+func TestDecode(t *testing.T) {
+	data, err := ioutil.ReadFile("testdata/decode.txt")
+	if err != nil {
+		t.Fatal(err)
+	}
+	all := string(data)
+	for strings.Contains(all, "\t\t") {
+		all = strings.Replace(all, "\t\t", "\t", -1)
+	}
+	for _, line := range strings.Split(all, "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		f := strings.SplitN(line, "\t", 4)
+		i := strings.Index(f[0], "|")
+		if i < 0 {
+			t.Errorf("parsing %q: missing | separator", f[0])
+			continue
+		}
+		if i%2 != 0 {
+			t.Errorf("parsing %q: misaligned | separator", f[0])
+		}
+		size := i / 2
+		code, err := hex.DecodeString(f[0][:i] + f[0][i+1:])
+		if err != nil {
+			t.Errorf("parsing %q: %v", f[0], err)
+			continue
+		}
+		mode, err := strconv.Atoi(f[1])
+		if err != nil {
+			t.Errorf("invalid mode %q in: %s", f[1], line)
+			continue
+		}
+		syntax, asm := f[2], f[3]
+		inst, err := Decode(code, Mode(mode))
+		var out string
+		if err != nil {
+			out = "error: " + err.Error()
+		} else {
+			switch syntax {
+			case "gnu":
+				out = GNUSyntax(inst)
+			case "plan9":
+				out = Plan9Syntax(inst, 0, nil, nil)
+			default:
+				t.Errorf("unknown syntax %q", syntax)
+				continue
+			}
+		}
+		if out != asm || inst.Len != size {
+			t.Errorf("Decode(%s) [%s] = %s, %d, want %s, %d", f[0], syntax, out, inst.Len, asm, size)
+		}
+	}
+}
--- a/src/cmd/internal/rsc.io/arm/armasm/ext_test.go
+++ b/src/cmd/internal/rsc.io/arm/armasm/ext_test.go
@ -0,0 +1,614 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Support for testing against external disassembler program.
+// Copied and simplified from rsc.io/x86/x86asm/ext_test.go.
+
+package armasm
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/hex"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"math/rand"
+	"os"
+	"os/exec"
+	"regexp"
+	"runtime"
+	"strings"
+	"testing"
+	"time"
+)
+
+var (
+	printTests = flag.Bool("printtests", false, "print test cases that exercise new code paths")
+	dumpTest   = flag.Bool("dump", false, "dump all encodings")
+	mismatch   = flag.Bool("mismatch", false, "log allowed mismatches")
+	longTest   = flag.Bool("long", false, "long test")
+	keep       = flag.Bool("keep", false, "keep object files around")
+	debug      = false
+)
+
+// A ExtInst represents a single decoded instruction parsed
+// from an external disassembler's output.
+type ExtInst struct {
+	addr uint32
+	enc  [4]byte
+	nenc int
+	text string
+}
+
+func (r ExtInst) String() string {
+	return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text)
+}
+
+// An ExtDis is a connection between an external disassembler and a test.
+type ExtDis struct {
+	Arch     Mode
+	Dec      chan ExtInst
+	File     *os.File
+	Size     int
+	KeepFile bool
+	Cmd      *exec.Cmd
+}
+
+// Run runs the given command - the external disassembler - and returns
+// a buffered reader of its standard output.
+func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) {
+	if *keep {
+		log.Printf("%s\n", strings.Join(cmd, " "))
+	}
+	ext.Cmd = exec.Command(cmd[0], cmd[1:]...)
+	out, err := ext.Cmd.StdoutPipe()
+	if err != nil {
+		return nil, fmt.Errorf("stdoutpipe: %v", err)
+	}
+	if err := ext.Cmd.Start(); err != nil {
+		return nil, fmt.Errorf("exec: %v", err)
+	}
+
+	b := bufio.NewReaderSize(out, 1<<20)
+	return b, nil
+}
+
+// Wait waits for the command started with Run to exit.
+func (ext *ExtDis) Wait() error {
+	return ext.Cmd.Wait()
+}
+
+// testExtDis tests a set of byte sequences against an external disassembler.
+// The disassembler is expected to produce the given syntax and be run
+// in the given architecture mode (16, 32, or 64-bit).
+// The extdis function must start the external disassembler
+// and then parse its output, sending the parsed instructions on ext.Dec.
+// The generate function calls its argument f once for each byte sequence
+// to be tested. The generate function itself will be called twice, and it must
+// make the same sequence of calls to f each time.
+// When a disassembly does not match the internal decoding,
+// allowedMismatch determines whether this mismatch should be
+// allowed, or else considered an error.
+func testExtDis(
+	t *testing.T,
+	syntax string,
+	arch Mode,
+	extdis func(ext *ExtDis) error,
+	generate func(f func([]byte)),
+	allowedMismatch func(text string, size int, inst *Inst, dec ExtInst) bool,
+) {
+	start := time.Now()
+	ext := &ExtDis{
+		Dec:  make(chan ExtInst),
+		Arch: arch,
+	}
+	errc := make(chan error)
+
+	// First pass: write instructions to input file for external disassembler.
+	file, f, size, err := writeInst(generate)
+	if err != nil {
+		t.Fatal(err)
+	}
+	ext.Size = size
+	ext.File = f
+	defer func() {
+		f.Close()
+		if !*keep {
+			os.Remove(file)
+		}
+	}()
+
+	// Second pass: compare disassembly against our decodings.
+	var (
+		totalTests  = 0
+		totalSkips  = 0
+		totalErrors = 0
+
+		errors = make([]string, 0, 100) // sampled errors, at most cap
+	)
+	go func() {
+		errc <- extdis(ext)
+	}()
+	generate(func(enc []byte) {
+		dec, ok := <-ext.Dec
+		if !ok {
+			t.Errorf("decoding stream ended early")
+			return
+		}
+		inst, text := disasm(syntax, arch, pad(enc))
+		totalTests++
+		if *dumpTest {
+			fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc)
+		}
+		if text != dec.text || inst.Len != dec.nenc {
+			suffix := ""
+			if allowedMismatch(text, size, &inst, dec) {
+				totalSkips++
+				if !*mismatch {
+					return
+				}
+				suffix += " (allowed mismatch)"
+			}
+			totalErrors++
+			if len(errors) >= cap(errors) {
+				j := rand.Intn(totalErrors)
+				if j >= cap(errors) {
+					return
+				}
+				errors = append(errors[:j], errors[j+1:]...)
+			}
+			errors = append(errors, fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s", enc, text, inst.Len, dec.text, dec.nenc, suffix))
+		}
+	})
+
+	if *mismatch {
+		totalErrors -= totalSkips
+	}
+
+	for _, b := range errors {
+		t.Log(b)
+	}
+
+	if totalErrors > 0 {
+		t.Fail()
+	}
+	t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds())
+
+	if err := <-errc; err != nil {
+		t.Fatal("external disassembler: %v", err)
+	}
+
+}
+
+const start = 0x8000 // start address of text
+
+// writeInst writes the generated byte sequences to a new file
+// starting at offset start. That file is intended to be the input to
+// the external disassembler.
+func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) {
+	f, err = ioutil.TempFile("", "armasm")
+	if err != nil {
+		return
+	}
+
+	file = f.Name()
+
+	f.Seek(start, 0)
+	w := bufio.NewWriter(f)
+	defer w.Flush()
+	size = 0
+	generate(func(x []byte) {
+		if len(x) > 4 {
+			x = x[:4]
+		}
+		if debug {
+			fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):])
+		}
+		w.Write(x)
+		w.Write(zeros[len(x):])
+		size += len(zeros)
+	})
+	return file, f, size, nil
+}
+
+var zeros = []byte{0, 0, 0, 0}
+
+// pad pads the code sequenc with pops.
+func pad(enc []byte) []byte {
+	if len(enc) < 4 {
+		enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...)
+	}
+	return enc
+}
+
+// disasm returns the decoded instruction and text
+// for the given source bytes, using the given syntax and mode.
+func disasm(syntax string, mode Mode, src []byte) (inst Inst, text string) {
+	// If printTests is set, we record the coverage value
+	// before and after, and we write out the inputs for which
+	// coverage went up, in the format expected in testdata/decode.text.
+	// This produces a fairly small set of test cases that exercise nearly
+	// all the code.
+	var cover float64
+	if *printTests {
+		cover -= coverage()
+	}
+
+	inst, err := Decode(src, mode)
+	if err != nil {
+		text = "error: " + err.Error()
+	} else {
+		text = inst.String()
+		switch syntax {
+		//case "arm":
+		//	text = ARMSyntax(inst)
+		case "gnu":
+			text = GNUSyntax(inst)
+		//case "plan9":
+		//	text = Plan9Syntax(inst, 0, nil)
+		default:
+			text = "error: unknown syntax " + syntax
+		}
+	}
+
+	if *printTests {
+		cover += coverage()
+		if cover > 0 {
+			max := len(src)
+			if max > 4 && inst.Len <= 4 {
+				max = 4
+			}
+			fmt.Printf("%x|%x\t%d\t%s\t%s\n", src[:inst.Len], src[inst.Len:max], mode, syntax, text)
+		}
+	}
+
+	return
+}
+
+// coverage returns a floating point number denoting the
+// test coverage until now. The number increases when new code paths are exercised,
+// both in the Go program and in the decoder byte code.
+func coverage() float64 {
+	/*
+		testing.Coverage is not in the main distribution.
+		The implementation, which must go in package testing, is:
+
+		// Coverage reports the current code coverage as a fraction in the range [0, 1].
+		func Coverage() float64 {
+			var n, d int64
+			for _, counters := range cover.Counters {
+				for _, c := range counters {
+					if c > 0 {
+						n++
+					}
+					d++
+				}
+			}
+			if d == 0 {
+				return 0
+			}
+			return float64(n) / float64(d)
+		}
+	*/
+
+	var f float64
+	f += testing.Coverage()
+	f += decodeCoverage()
+	return f
+}
+
+func decodeCoverage() float64 {
+	n := 0
+	for _, t := range decoderCover {
+		if t {
+			n++
+		}
+	}
+	return float64(1+n) / float64(1+len(decoderCover))
+}
+
+// Helpers for writing disassembler output parsers.
+
+// hasPrefix reports whether any of the space-separated words in the text s
+// begins with any of the given prefixes.
+func hasPrefix(s string, prefixes ...string) bool {
+	for _, prefix := range prefixes {
+		for s := s; s != ""; {
+			if strings.HasPrefix(s, prefix) {
+				return true
+			}
+			i := strings.Index(s, " ")
+			if i < 0 {
+				break
+			}
+			s = s[i+1:]
+		}
+	}
+	return false
+}
+
+// contains reports whether the text s contains any of the given substrings.
+func contains(s string, substrings ...string) bool {
+	for _, sub := range substrings {
+		if strings.Contains(s, sub) {
+			return true
+		}
+	}
+	return false
+}
+
+// isHex reports whether b is a hexadecimal character (0-9A-Fa-f).
+func isHex(b byte) bool { return b == '0' || unhex[b] > 0 }
+
+// parseHex parses the hexadecimal byte dump in hex,
+// appending the parsed bytes to raw and returning the updated slice.
+// The returned bool signals whether any invalid hex was found.
+// Spaces and tabs between bytes are okay but any other non-hex is not.
+func parseHex(hex []byte, raw []byte) ([]byte, bool) {
+	hex = trimSpace(hex)
+	for j := 0; j < len(hex); {
+		for hex[j] == ' ' || hex[j] == '\t' {
+			j++
+		}
+		if j >= len(hex) {
+			break
+		}
+		if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) {
+			return nil, false
+		}
+		raw = append(raw, unhex[hex[j]]<<4|unhex[hex[j+1]])
+		j += 2
+	}
+	return raw, true
+}
+
+var unhex = [256]byte{
+	'0': 0,
+	'1': 1,
+	'2': 2,
+	'3': 3,
+	'4': 4,
+	'5': 5,
+	'6': 6,
+	'7': 7,
+	'8': 8,
+	'9': 9,
+	'A': 10,
+	'B': 11,
+	'C': 12,
+	'D': 13,
+	'E': 14,
+	'F': 15,
+	'a': 10,
+	'b': 11,
+	'c': 12,
+	'd': 13,
+	'e': 14,
+	'f': 15,
+}
+
+// index is like bytes.Index(s, []byte(t)) but avoids the allocation.
+func index(s []byte, t string) int {
+	i := 0
+	for {
+		j := bytes.IndexByte(s[i:], t[0])
+		if j < 0 {
+			return -1
+		}
+		i = i + j
+		if i+len(t) > len(s) {
+			return -1
+		}
+		for k := 1; k < len(t); k++ {
+			if s[i+k] != t[k] {
+				goto nomatch
+			}
+		}
+		return i
+	nomatch:
+		i++
+	}
+}
+
+// fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s.
+// If s must be rewritten, it is rewritten in place.
+func fixSpace(s []byte) []byte {
+	s = trimSpace(s)
+	for i := 0; i < len(s); i++ {
+		if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' {
+			goto Fix
+		}
+	}
+	return s
+
+Fix:
+	b := s
+	w := 0
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		if c == '\t' || c == '\n' {
+			c = ' '
+		}
+		if c == ' ' && w > 0 && b[w-1] == ' ' {
+			continue
+		}
+		b[w] = c
+		w++
+	}
+	if w > 0 && b[w-1] == ' ' {
+		w--
+	}
+	return b[:w]
+}
+
+// trimSpace trims leading and trailing space from s, returning a subslice of s.
+func trimSpace(s []byte) []byte {
+	j := len(s)
+	for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') {
+		j--
+	}
+	i := 0
+	for i < j && (s[i] == ' ' || s[i] == '\t') {
+		i++
+	}
+	return s[i:j]
+}
+
+// pcrel matches instructions using relative addressing mode.
+var (
+	pcrel = regexp.MustCompile(`^((?:.* )?(?:b|bl)x?(?:eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le)?) 0x([0-9a-f]+)$`)
+)
+
+// Generators.
+//
+// The test cases are described as functions that invoke a callback repeatedly,
+// with a new input sequence each time. These helpers make writing those
+// a little easier.
+
+// condCases generates conditional instructions.
+func condCases(t *testing.T) func(func([]byte)) {
+	return func(try func([]byte)) {
+		// All the strides are relatively prime to 2 and therefore to 2²⁸,
+		// so we will not repeat any instructions until we have tried all 2²⁸.
+		// Using a stride other than 1 is meant to visit the instructions in a
+		// pseudorandom order, which gives better variety in the set of
+		// test cases chosen by -printtests.
+		stride := uint32(10007)
+		n := 1 << 28 / 7
+		if testing.Short() {
+			stride = 100003
+			n = 1 << 28 / 1001
+		} else if *longTest {
+			stride = 200000033
+			n = 1 << 28
+		}
+		x := uint32(0)
+		for i := 0; i < n; i++ {
+			enc := (x%15)<<28 | x&(1<<28-1)
+			try([]byte{byte(enc), byte(enc >> 8), byte(enc >> 16), byte(enc >> 24)})
+			x += stride
+		}
+	}
+}
+
+// uncondCases generates unconditional instructions.
+func uncondCases(t *testing.T) func(func([]byte)) {
+	return func(try func([]byte)) {
+		condCases(t)(func(enc []byte) {
+			enc[3] |= 0xF0
+			try(enc)
+		})
+	}
+}
+
+func countBits(x uint32) int {
+	n := 0
+	for ; x != 0; x >>= 1 {
+		n += int(x & 1)
+	}
+	return n
+}
+
+func expandBits(x, m uint32) uint32 {
+	var out uint32
+	for i := uint(0); i < 32; i++ {
+		out >>= 1
+		if m&1 != 0 {
+			out |= (x & 1) << 31
+			x >>= 1
+		}
+		m >>= 1
+	}
+	return out
+}
+
+func tryCondMask(mask, val uint32, try func([]byte)) {
+	n := countBits(^mask)
+	bits := uint32(0)
+	for i := 0; i < 1<<uint(n); i++ {
+		bits += 848251 // arbitrary prime
+		x := val | expandBits(bits, ^mask) | uint32(i)%15<<28
+		try([]byte{byte(x), byte(x >> 8), byte(x >> 16), byte(x >> 24)})
+	}
+}
+
+// vfpCases generates VFP instructions.
+func vfpCases(t *testing.T) func(func([]byte)) {
+	const (
+		vfpmask uint32 = 0xFF00FE10
+		vfp     uint32 = 0x0E009A00
+	)
+	return func(try func([]byte)) {
+		tryCondMask(0xff00fe10, 0x0e009a00, try) // standard VFP instruction space
+		tryCondMask(0xffc00f7f, 0x0e000b10, try) // VFP MOV core reg to/from float64 half
+		tryCondMask(0xffe00f7f, 0x0e000a10, try) // VFP MOV core reg to/from float32
+		tryCondMask(0xffef0fff, 0x0ee10a10, try) // VFP MOV core reg to/from cond codes
+	}
+}
+
+// hexCases generates the cases written in hexadecimal in the encoded string.
+// Spaces in 'encoded' separate entire test cases, not individual bytes.
+func hexCases(t *testing.T, encoded string) func(func([]byte)) {
+	return func(try func([]byte)) {
+		for _, x := range strings.Fields(encoded) {
+			src, err := hex.DecodeString(x)
+			if err != nil {
+				t.Errorf("parsing %q: %v", x, err)
+			}
+			try(src)
+		}
+	}
+}
+
+// testdataCases generates the test cases recorded in testdata/decode.txt.
+// It only uses the inputs; it ignores the answers recorded in that file.
+func testdataCases(t *testing.T) func(func([]byte)) {
+	var codes [][]byte
+	data, err := ioutil.ReadFile("testdata/decode.txt")
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, line := range strings.Split(string(data), "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		f := strings.Fields(line)[0]
+		i := strings.Index(f, "|")
+		if i < 0 {
+			t.Errorf("parsing %q: missing | separator", f)
+			continue
+		}
+		if i%2 != 0 {
+			t.Errorf("parsing %q: misaligned | separator", f)
+		}
+		code, err := hex.DecodeString(f[:i] + f[i+1:])
+		if err != nil {
+			t.Errorf("parsing %q: %v", f, err)
+			continue
+		}
+		codes = append(codes, code)
+	}
+
+	return func(try func([]byte)) {
+		for _, code := range codes {
+			try(code)
+		}
+	}
+}
+
+func caller(skip int) string {
+	pc, _, _, _ := runtime.Caller(skip)
+	f := runtime.FuncForPC(pc)
+	name := "?"
+	if f != nil {
+		name = f.Name()
+		if i := strings.LastIndex(name, "."); i >= 0 {
+			name = name[i+1:]
+		}
+	}
+	return name
+}
--- a/src/cmd/internal/rsc.io/arm/armasm/gnu.go
+++ b/src/cmd/internal/rsc.io/arm/armasm/gnu.go
@ -0,0 +1,164 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package armasm
+
+import (
+	"bytes"
+	"fmt"
+	"strings"
+)
+
+var saveDot = strings.NewReplacer(
+	".F16", "_dot_F16",
+	".F32", "_dot_F32",
+	".F64", "_dot_F64",
+	".S32", "_dot_S32",
+	".U32", "_dot_U32",
+	".FXS", "_dot_S",
+	".FXU", "_dot_U",
+	".32", "_dot_32",
+)
+
+// GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils.
+// This form typically matches the syntax defined in the ARM Reference Manual.
+func GNUSyntax(inst Inst) string {
+	var buf bytes.Buffer
+	op := inst.Op.String()
+	op = saveDot.Replace(op)
+	op = strings.Replace(op, ".", "", -1)
+	op = strings.Replace(op, "_dot_", ".", -1)
+	op = strings.ToLower(op)
+	buf.WriteString(op)
+	sep := " "
+	for i, arg := range inst.Args {
+		if arg == nil {
+			break
+		}
+		text := gnuArg(&inst, i, arg)
+		if text == "" {
+			continue
+		}
+		buf.WriteString(sep)
+		sep = ", "
+		buf.WriteString(text)
+	}
+	return buf.String()
+}
+
+func gnuArg(inst *Inst, argIndex int, arg Arg) string {
+	switch inst.Op &^ 15 {
+	case LDRD_EQ, LDREXD_EQ, STRD_EQ:
+		if argIndex == 1 {
+			// second argument in consecutive pair not printed
+			return ""
+		}
+	case STREXD_EQ:
+		if argIndex == 2 {
+			// second argument in consecutive pair not printed
+			return ""
+		}
+	}
+
+	switch arg := arg.(type) {
+	case Imm:
+		switch inst.Op &^ 15 {
+		case BKPT_EQ:
+			return fmt.Sprintf("%#04x", uint32(arg))
+		case SVC_EQ:
+			return fmt.Sprintf("%#08x", uint32(arg))
+		}
+		return fmt.Sprintf("#%d", int32(arg))
+
+	case ImmAlt:
+		return fmt.Sprintf("#%d, %d", arg.Val, arg.Rot)
+
+	case Mem:
+		R := gnuArg(inst, -1, arg.Base)
+		X := ""
+		if arg.Sign != 0 {
+			X = ""
+			if arg.Sign < 0 {
+				X = "-"
+			}
+			X += gnuArg(inst, -1, arg.Index)
+			if arg.Shift == ShiftLeft && arg.Count == 0 {
+				// nothing
+			} else if arg.Shift == RotateRightExt {
+				X += ", rrx"
+			} else {
+				X += fmt.Sprintf(", %s #%d", strings.ToLower(arg.Shift.String()), arg.Count)
+			}
+		} else {
+			X = fmt.Sprintf("#%d", arg.Offset)
+		}
+
+		switch arg.Mode {
+		case AddrOffset:
+			if X == "#0" {
+				return fmt.Sprintf("[%s]", R)
+			}
+			return fmt.Sprintf("[%s, %s]", R, X)
+		case AddrPreIndex:
+			return fmt.Sprintf("[%s, %s]!", R, X)
+		case AddrPostIndex:
+			return fmt.Sprintf("[%s], %s", R, X)
+		case AddrLDM:
+			if X == "#0" {
+				return R
+			}
+		case AddrLDM_WB:
+			if X == "#0" {
+				return R + "!"
+			}
+		}
+		return fmt.Sprintf("[%s Mode(%d) %s]", R, int(arg.Mode), X)
+
+	case PCRel:
+		return fmt.Sprintf(".%+#x", int32(arg)+4)
+
+	case Reg:
+		switch inst.Op &^ 15 {
+		case LDREX_EQ:
+			if argIndex == 0 {
+				return fmt.Sprintf("r%d", int32(arg))
+			}
+		}
+		switch arg {
+		case R10:
+			return "sl"
+		case R11:
+			return "fp"
+		case R12:
+			return "ip"
+		}
+
+	case RegList:
+		var buf bytes.Buffer
+		fmt.Fprintf(&buf, "{")
+		sep := ""
+		for i := 0; i < 16; i++ {
+			if arg&(1<<uint(i)) != 0 {
+				fmt.Fprintf(&buf, "%s%s", sep, gnuArg(inst, -1, Reg(i)))
+				sep = ", "
+			}
+		}
+		fmt.Fprintf(&buf, "}")
+		return buf.String()
+
+	case RegShift:
+		if arg.Shift == ShiftLeft && arg.Count == 0 {
+			return gnuArg(inst, -1, arg.Reg)
+		}
+		if arg.Shift == RotateRightExt {
+			return gnuArg(inst, -1, arg.Reg) + ", rrx"
+		}
+		return fmt.Sprintf("%s, %s #%d", gnuArg(inst, -1, arg.Reg), strings.ToLower(arg.Shift.String()), arg.Count)
+
+	case RegShiftReg:
+		return fmt.Sprintf("%s, %s %s", gnuArg(inst, -1, arg.Reg), strings.ToLower(arg.Shift.String()), gnuArg(inst, -1, arg.RegCount))
+
+	}
+	return strings.ToLower(arg.String())
+}
--- a/src/cmd/internal/rsc.io/arm/armasm/inst.go
+++ b/src/cmd/internal/rsc.io/arm/armasm/inst.go
@ -0,0 +1,438 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package armasm
+
+import (
+	"bytes"
+	"fmt"
+)
+
+// A Mode is an instruction execution mode.
+type Mode int
+
+const (
+	_ Mode = iota
+	ModeARM
+	ModeThumb
+)
+
+func (m Mode) String() string {
+	switch m {
+	case ModeARM:
+		return "ARM"
+	case ModeThumb:
+		return "Thumb"
+	}
+	return fmt.Sprintf("Mode(%d)", int(m))
+}
+
+// An Op is an ARM opcode.
+type Op uint16
+
+// NOTE: The actual Op values are defined in tables.go.
+// They are chosen to simplify instruction decoding and
+// are not a dense packing from 0 to N, although the
+// density is high, probably at least 90%.
+
+func (op Op) String() string {
+	if op >= Op(len(opstr)) || opstr[op] == "" {
+		return fmt.Sprintf("Op(%d)", int(op))
+	}
+	return opstr[op]
+}
+
+// An Inst is a single instruction.
+type Inst struct {
+	Op   Op     // Opcode mnemonic
+	Enc  uint32 // Raw encoding bits.
+	Len  int    // Length of encoding in bytes.
+	Args Args   // Instruction arguments, in ARM manual order.
+}
+
+func (i Inst) String() string {
+	var buf bytes.Buffer
+	buf.WriteString(i.Op.String())
+	for j, arg := range i.Args {
+		if arg == nil {
+			break
+		}
+		if j == 0 {
+			buf.WriteString(" ")
+		} else {
+			buf.WriteString(", ")
+		}
+		buf.WriteString(arg.String())
+	}
+	return buf.String()
+}
+
+// An Args holds the instruction arguments.
+// If an instruction has fewer than 4 arguments,
+// the final elements in the array are nil.
+type Args [4]Arg
+
+// An Arg is a single instruction argument, one of these types:
+// Endian, Imm, Mem, PCRel, Reg, RegList, RegShift, RegShiftReg.
+type Arg interface {
+	IsArg()
+	String() string
+}
+
+type Float32Imm float32
+
+func (Float32Imm) IsArg() {}
+
+func (f Float32Imm) String() string {
+	return fmt.Sprintf("#%v", float32(f))
+}
+
+type Float64Imm float32
+
+func (Float64Imm) IsArg() {}
+
+func (f Float64Imm) String() string {
+	return fmt.Sprintf("#%v", float64(f))
+}
+
+// An Imm is an integer constant.
+type Imm uint32
+
+func (Imm) IsArg() {}
+
+func (i Imm) String() string {
+	return fmt.Sprintf("#%#x", uint32(i))
+}
+
+// A ImmAlt is an alternate encoding of an integer constant.
+type ImmAlt struct {
+	Val uint8
+	Rot uint8
+}
+
+func (ImmAlt) IsArg() {}
+
+func (i ImmAlt) Imm() Imm {
+	v := uint32(i.Val)
+	r := uint(i.Rot)
+	return Imm(v>>r | v<<(32-r))
+}
+
+func (i ImmAlt) String() string {
+	return fmt.Sprintf("#%#x, %d", i.Val, i.Rot)
+}
+
+// A Label is a text (code) address.
+type Label uint32
+
+func (Label) IsArg() {}
+
+func (i Label) String() string {
+	return fmt.Sprintf("%#x", uint32(i))
+}
+
+// A Reg is a single register.
+// The zero value denotes R0, not the absence of a register.
+type Reg uint8
+
+const (
+	R0 Reg = iota
+	R1
+	R2
+	R3
+	R4
+	R5
+	R6
+	R7
+	R8
+	R9
+	R10
+	R11
+	R12
+	R13
+	R14
+	R15
+
+	S0
+	S1
+	S2
+	S3
+	S4
+	S5
+	S6
+	S7
+	S8
+	S9
+	S10
+	S11
+	S12
+	S13
+	S14
+	S15
+	S16
+	S17
+	S18
+	S19
+	S20
+	S21
+	S22
+	S23
+	S24
+	S25
+	S26
+	S27
+	S28
+	S29
+	S30
+	S31
+
+	D0
+	D1
+	D2
+	D3
+	D4
+	D5
+	D6
+	D7
+	D8
+	D9
+	D10
+	D11
+	D12
+	D13
+	D14
+	D15
+	D16
+	D17
+	D18
+	D19
+	D20
+	D21
+	D22
+	D23
+	D24
+	D25
+	D26
+	D27
+	D28
+	D29
+	D30
+	D31
+
+	APSR
+	APSR_nzcv
+	FPSCR
+
+	SP = R13
+	LR = R14
+	PC = R15
+)
+
+func (Reg) IsArg() {}
+
+func (r Reg) String() string {
+	switch r {
+	case APSR:
+		return "APSR"
+	case APSR_nzcv:
+		return "APSR_nzcv"
+	case FPSCR:
+		return "FPSCR"
+	case SP:
+		return "SP"
+	case PC:
+		return "PC"
+	case LR:
+		return "LR"
+	}
+	if R0 <= r && r <= R15 {
+		return fmt.Sprintf("R%d", int(r-R0))
+	}
+	if S0 <= r && r <= S31 {
+		return fmt.Sprintf("S%d", int(r-S0))
+	}
+	if D0 <= r && r <= D31 {
+		return fmt.Sprintf("D%d", int(r-D0))
+	}
+	return fmt.Sprintf("Reg(%d)", int(r))
+}
+
+// A RegX represents a fraction of a multi-value register.
+// The Index field specifies the index number,
+// but the size of the fraction is not specified.
+// It must be inferred from the instruction and the register type.
+// For example, in a VMOV instruction, RegX{D5, 1} represents
+// the top 32 bits of the 64-bit D5 register.
+type RegX struct {
+	Reg   Reg
+	Index int
+}
+
+func (RegX) IsArg() {}
+
+func (r RegX) String() string {
+	return fmt.Sprintf("%s[%d]", r.Reg, r.Index)
+}
+
+// A RegList is a register list.
+// Bits at indexes x = 0 through 15 indicate whether the corresponding Rx register is in the list.
+type RegList uint16
+
+func (RegList) IsArg() {}
+
+func (r RegList) String() string {
+	var buf bytes.Buffer
+	fmt.Fprintf(&buf, "{")
+	sep := ""
+	for i := 0; i < 16; i++ {
+		if r&(1<<uint(i)) != 0 {
+			fmt.Fprintf(&buf, "%s%s", sep, Reg(i).String())
+			sep = ","
+		}
+	}
+	fmt.Fprintf(&buf, "}")
+	return buf.String()
+}
+
+// An Endian is the argument to the SETEND instruction.
+type Endian uint8
+
+const (
+	LittleEndian Endian = 0
+	BigEndian    Endian = 1
+)
+
+func (Endian) IsArg() {}
+
+func (e Endian) String() string {
+	if e != 0 {
+		return "BE"
+	}
+	return "LE"
+}
+
+// A Shift describes an ARM shift operation.
+type Shift uint8
+
+const (
+	ShiftLeft        Shift = 0 // left shift
+	ShiftRight       Shift = 1 // logical (unsigned) right shift
+	ShiftRightSigned Shift = 2 // arithmetic (signed) right shift
+	RotateRight      Shift = 3 // right rotate
+	RotateRightExt   Shift = 4 // right rotate through carry (Count will always be 1)
+)
+
+var shiftName = [...]string{
+	"LSL", "LSR", "ASR", "ROR", "RRX",
+}
+
+func (s Shift) String() string {
+	if s < 5 {
+		return shiftName[s]
+	}
+	return fmt.Sprintf("Shift(%d)", int(s))
+}
+
+// A RegShift is a register shifted by a constant.
+type RegShift struct {
+	Reg   Reg
+	Shift Shift
+	Count uint8
+}
+
+func (RegShift) IsArg() {}
+
+func (r RegShift) String() string {
+	return fmt.Sprintf("%s %s #%d", r.Reg, r.Shift, r.Count)
+}
+
+// A RegShiftReg is a register shifted by a register.
+type RegShiftReg struct {
+	Reg      Reg
+	Shift    Shift
+	RegCount Reg
+}
+
+func (RegShiftReg) IsArg() {}
+
+func (r RegShiftReg) String() string {
+	return fmt.Sprintf("%s %s %s", r.Reg, r.Shift, r.RegCount)
+}
+
+// A PCRel describes a memory address (usually a code label)
+// as a distance relative to the program counter.
+// TODO(rsc): Define which program counter (PC+4? PC+8? PC?).
+type PCRel int32
+
+func (PCRel) IsArg() {}
+
+func (r PCRel) String() string {
+	return fmt.Sprintf("PC%+#x", int32(r))
+}
+
+// An AddrMode is an ARM addressing mode.
+type AddrMode uint8
+
+const (
+	_             AddrMode = iota
+	AddrPostIndex          // [R], X – use address R, set R = R + X
+	AddrPreIndex           // [R, X]! – use address R + X, set R = R + X
+	AddrOffset             // [R, X] – use address R + X
+	AddrLDM                // R – [R] but formats as R, for LDM/STM only
+	AddrLDM_WB             // R! - [R], X where X is instruction-specific amount, for LDM/STM only
+)
+
+// A Mem is a memory reference made up of a base R and index expression X.
+// The effective memory address is R or R+X depending on AddrMode.
+// The index expression is X = Sign*(Index Shift Count) + Offset,
+// but in any instruction either Sign = 0 or Offset = 0.
+type Mem struct {
+	Base   Reg
+	Mode   AddrMode
+	Sign   int8
+	Index  Reg
+	Shift  Shift
+	Count  uint8
+	Offset int16
+}
+
+func (Mem) IsArg() {}
+
+func (m Mem) String() string {
+	R := m.Base.String()
+	X := ""
+	if m.Sign != 0 {
+		X = "+"
+		if m.Sign < 0 {
+			X = "-"
+		}
+		X += m.Index.String()
+		if m.Shift != ShiftLeft || m.Count != 0 {
+			X += fmt.Sprintf(", %s #%d", m.Shift, m.Count)
+		}
+	} else {
+		X = fmt.Sprintf("#%d", m.Offset)
+	}
+
+	switch m.Mode {
+	case AddrOffset:
+		if X == "#0" {
+			return fmt.Sprintf("[%s]", R)
+		}
+		return fmt.Sprintf("[%s, %s]", R, X)
+	case AddrPreIndex:
+		return fmt.Sprintf("[%s, %s]!", R, X)
+	case AddrPostIndex:
+		return fmt.Sprintf("[%s], %s", R, X)
+	case AddrLDM:
+		if X == "#0" {
+			return R
+		}
+	case AddrLDM_WB:
+		if X == "#0" {
+			return R + "!"
+		}
+	}
+	return fmt.Sprintf("[%s Mode(%d) %s]", R, int(m.Mode), X)
+}
--- a/src/cmd/internal/rsc.io/arm/armasm/objdump_test.go
+++ b/src/cmd/internal/rsc.io/arm/armasm/objdump_test.go
@ -0,0 +1,258 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package armasm
+
+import (
+	"encoding/binary"
+	"strings"
+	"testing"
+)
+
+func TestObjdumpARMTestdata(t *testing.T) { testObjdumpARM(t, testdataCases(t)) }
+func TestObjdumpARMManual(t *testing.T)   { testObjdumpARM(t, hexCases(t, objdumpManualTests)) }
+func TestObjdumpARMCond(t *testing.T)     { testObjdumpARM(t, condCases(t)) }
+func TestObjdumpARMUncond(t *testing.T)   { testObjdumpARM(t, uncondCases(t)) }
+func TestObjdumpARMVFP(t *testing.T)      { testObjdumpARM(t, vfpCases(t)) }
+
+// objdumpManualTests holds test cases that will be run by TestObjdumpARMManual.
+// If you are debugging a few cases that turned up in a longer run, it can be useful
+// to list them here and then use -run=Manual, particularly with tracing enabled.
+// Note that these are byte sequences, so they must be reversed from the usual
+// word presentation.
+var objdumpManualTests = `
+00000000
+`
+
+// allowedMismatchObjdump reports whether the mismatch between text and dec
+// should be allowed by the test.
+func allowedMismatchObjdump(text string, size int, inst *Inst, dec ExtInst) bool {
+	if hasPrefix(text, "error:") {
+		if hasPrefix(dec.text, unsupported...) || strings.Contains(dec.text, "invalid:") || strings.HasSuffix(dec.text, "^") || strings.Contains(dec.text, "f16.f64") || strings.Contains(dec.text, "f64.f16") {
+			return true
+		}
+		// word 4320F02C: libopcodes says 'nopmi {44}'.
+		if hasPrefix(dec.text, "nop") && strings.Contains(dec.text, "{") {
+			return true
+		}
+	}
+
+	if hasPrefix(dec.text, "error:") && text == "undef" && inst.Enc == 0xf7fabcfd {
+		return true
+	}
+
+	// word 00f02053: libopcodes says 'noppl {0}'.
+	if hasPrefix(dec.text, "nop") && hasPrefix(text, "nop") && dec.text == text+" {0}" {
+		return true
+	}
+
+	// word F57FF04F. we say 'dsb #15', libopcodes says 'dsb sy'.
+	if hasPrefix(text, "dsb") && hasPrefix(dec.text, "dsb") {
+		return true
+	}
+	// word F57FF06F. we say 'isb #15', libopcodes says 'isb sy'.
+	if hasPrefix(text, "isb") && hasPrefix(dec.text, "isb") {
+		return true
+	}
+	// word F57FF053. we say 'dmb #3', libopcodes says 'dmb osh'.
+	if hasPrefix(text, "dmb") && hasPrefix(dec.text, "dmb") {
+		return true
+	}
+
+	// word 992D0000. push/stmdb with no registers (undefined).
+	// we say 'stmdbls sp!, {}', libopcodes says 'pushls {}'.
+	if hasPrefix(text, "stmdb") && hasPrefix(dec.text, "push") && strings.Contains(text, "{}") && strings.Contains(dec.text, "{}") {
+		return true
+	}
+
+	// word 28BD0000. pop/ldm with no registers (undefined).
+	// we say 'ldmcs sp!, {}', libopcodes says 'popcs {}'.
+	if hasPrefix(text, "ldm") && hasPrefix(dec.text, "pop") && strings.Contains(text, "{}") && strings.Contains(dec.text, "{}") {
+		return true
+	}
+
+	// word 014640F0.
+	// libopcodes emits #-0 for negative zero; we don't.
+	if strings.Replace(dec.text, "#-0", "#0", -1) == text || strings.Replace(dec.text, ", #-0", "", -1) == text {
+		return true
+	}
+
+	// word 91EF90F0. we say 'strdls r9, [pc, #0]!' but libopcodes says 'strdls r9, [pc]'.
+	// word D16F60F0. we say 'strdle r6, [pc, #0]!' but libopcodes says 'strdle r6, [pc, #-0]'.
+	if strings.Replace(text, ", #0]!", "]", -1) == strings.Replace(dec.text, ", #-0]", "]", -1) {
+		return true
+	}
+
+	// word 510F4000. we say apsr, libopcodes says CPSR.
+	if strings.Replace(dec.text, "CPSR", "apsr", -1) == text {
+		return true
+	}
+
+	// word 06A4B059.
+	// for ssat and usat, libopcodes decodes asr #0 as asr #0 but the manual seems to say it should be asr #32.
+	// There is never an asr #0.
+	if strings.Replace(dec.text, ", asr #0", ", asr #32", -1) == text {
+		return true
+	}
+
+	if len(dec.enc) >= 4 {
+		raw := binary.LittleEndian.Uint32(dec.enc[:4])
+
+		// word 21FFF0B5.
+		// the manual is clear that this is pre-indexed mode (with !) but libopcodes generates post-index (without !).
+		if raw&0x01200000 == 0x01200000 && strings.Replace(text, "!", "", -1) == dec.text {
+			return true
+		}
+
+		// word C100543E: libopcodes says tst, but no evidence for that.
+		if strings.HasPrefix(dec.text, "tst") && raw&0x0ff00000 != 0x03100000 && raw&0x0ff00000 != 0x01100000 {
+			return true
+		}
+
+		// word C3203CE8: libopcodes says teq, but no evidence for that.
+		if strings.HasPrefix(dec.text, "teq") && raw&0x0ff00000 != 0x03300000 && raw&0x0ff00000 != 0x01300000 {
+			return true
+		}
+
+		// word D14C552E: libopcodes says cmp but no evidence for that.
+		if strings.HasPrefix(dec.text, "cmp") && raw&0x0ff00000 != 0x03500000 && raw&0x0ff00000 != 0x01500000 {
+			return true
+		}
+
+		// word 2166AA4A: libopcodes says cmn but no evidence for that.
+		if strings.HasPrefix(dec.text, "cmn") && raw&0x0ff00000 != 0x03700000 && raw&0x0ff00000 != 0x01700000 {
+			return true
+		}
+
+		// word E70AEEEF: libopcodes says str but no evidence for that.
+		if strings.HasPrefix(dec.text, "str") && len(dec.text) >= 5 && (dec.text[3] == ' ' || dec.text[5] == ' ') && raw&0x0e500018 != 0x06000000 && raw&0x0e500000 != 0x0400000 {
+			return true
+		}
+
+		// word B0AF48F4: libopcodes says strd but P=0,W=1 which is unpredictable.
+		if hasPrefix(dec.text, "ldr", "str") && raw&0x01200000 == 0x00200000 {
+			return true
+		}
+
+		// word B6CC1C76: libopcodes inexplicably says 'uxtab16lt r1, ip, r6, ROR #24' instead of 'uxtab16lt r1, ip, r6, ror #24'
+		if strings.ToLower(dec.text) == text {
+			return true
+		}
+
+		// word F410FDA1: libopcodes says PLDW but the manual is clear that PLDW is F5/F7, not F4.
+		// word F7D0FB17: libopcodes says PLDW but the manual is clear that PLDW has 0x10 clear
+		if hasPrefix(dec.text, "pld") && raw&0xfd000010 != 0xf5000000 {
+			return true
+		}
+
+		// word F650FE14: libopcodes says PLI but the manual is clear that PLI has 0x10 clear
+		if hasPrefix(dec.text, "pli") && raw&0xff000010 != 0xf6000000 {
+			return true
+		}
+	}
+
+	return false
+}
+
+// Instructions known to libopcodes (or xed) but not to us.
+// Most of these are floating point coprocessor instructions.
+var unsupported = strings.Fields(`
+	abs
+	acs
+	adf
+	aes
+	asn
+	atn
+	cdp
+	cf
+	cmf
+	cnf
+	cos
+	cps
+	crc32
+	dvf
+	eret
+	exp
+	fadd
+	fcmp
+	fcpy
+	fcvt
+	fdiv
+	fdv
+	fix
+	fld
+	flt
+	fmac
+	fmd
+	fml
+	fmr
+	fms
+	fmul
+	fmx
+	fneg
+	fnm
+	frd
+	fsit
+	fsq
+	fst
+	fsu
+	fto
+	fui
+	hlt
+	hvc
+	lda
+	ldc
+	ldf
+	lfm
+	lgn
+	log
+	mar
+	mcr
+	mcrr
+	mia
+	mnf
+	mra
+	mrc
+	mrrc
+	mrs
+	msr
+	msr
+	muf
+	mvf
+	nrm
+	pol
+	pow
+	rdf
+	rfc
+	rfe
+	rfs
+	rmf
+	rnd
+	rpw
+	rsf
+	sdiv
+	sev
+	sfm
+	sha1
+	sha256
+	sin
+	smc
+	sqt
+	srs
+	stc
+	stf
+	stl
+	suf
+	tan
+	udf
+	udiv
+	urd
+	vfma
+	vfms
+	vfnma
+	vfnms
+	vrint
+	wfc
+	wfs
+`)
--- a/src/cmd/internal/rsc.io/arm/armasm/objdumpext_test.go
+++ b/src/cmd/internal/rsc.io/arm/armasm/objdumpext_test.go
@ -0,0 +1,260 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Copied and simplified from rsc.io/x86/x86asm/objdumpext_test.go.
+
+package armasm
+
+import (
+	"bytes"
+	"debug/elf"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+const objdumpPath = "/usr/local/bin/arm-linux-elf-objdump"
+
+func testObjdumpARM(t *testing.T, generate func(func([]byte))) {
+	testObjdumpArch(t, generate, ModeARM)
+}
+
+func testObjdumpArch(t *testing.T, generate func(func([]byte)), arch Mode) {
+	if testing.Short() {
+		t.Skip("skipping objdump test in short mode")
+	}
+
+	if _, err := os.Stat(objdumpPath); err != nil {
+		t.Fatal(err)
+	}
+
+	testExtDis(t, "gnu", arch, objdump, generate, allowedMismatchObjdump)
+}
+
+func objdump(ext *ExtDis) error {
+	// File already written with instructions; add ELF header.
+	if ext.Arch == ModeARM {
+		if err := writeELF32(ext.File, ext.Size); err != nil {
+			return err
+		}
+	} else {
+		panic("unknown arch")
+	}
+
+	b, err := ext.Run(objdumpPath, "-d", "-z", ext.File.Name())
+	if err != nil {
+		return err
+	}
+
+	var (
+		nmatch  int
+		reading bool
+		next    uint32 = start
+		addr    uint32
+		encbuf  [4]byte
+		enc     []byte
+		text    string
+	)
+	flush := func() {
+		if addr == next {
+			if m := pcrel.FindStringSubmatch(text); m != nil {
+				targ, _ := strconv.ParseUint(m[2], 16, 64)
+				text = fmt.Sprintf("%s .%+#x", m[1], int32(uint32(targ)-addr-uint32(len(enc))))
+			}
+			if strings.HasPrefix(text, "stmia") {
+				text = "stm" + text[5:]
+			}
+			if strings.HasPrefix(text, "stmfd") {
+				text = "stmdb" + text[5:]
+			}
+			if strings.HasPrefix(text, "ldmfd") {
+				text = "ldm" + text[5:]
+			}
+			text = strings.Replace(text, "#0.0", "#0", -1)
+			if text == "undefined" && len(enc) == 4 {
+				text = "error: unknown instruction"
+				enc = nil
+			}
+			if len(enc) == 4 {
+				// prints as word but we want to record bytes
+				enc[0], enc[3] = enc[3], enc[0]
+				enc[1], enc[2] = enc[2], enc[1]
+			}
+			ext.Dec <- ExtInst{addr, encbuf, len(enc), text}
+			encbuf = [4]byte{}
+			enc = nil
+			next += 4
+		}
+	}
+	var textangle = []byte("<.text>:")
+	for {
+		line, err := b.ReadSlice('\n')
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			return fmt.Errorf("reading objdump output: %v", err)
+		}
+		if bytes.Contains(line, textangle) {
+			reading = true
+			continue
+		}
+		if !reading {
+			continue
+		}
+		if debug {
+			os.Stdout.Write(line)
+		}
+		if enc1 := parseContinuation(line, encbuf[:len(enc)]); enc1 != nil {
+			enc = enc1
+			continue
+		}
+		flush()
+		nmatch++
+		addr, enc, text = parseLine(line, encbuf[:0])
+		if addr > next {
+			return fmt.Errorf("address out of sync expected <= %#x at %q in:\n%s", next, line, line)
+		}
+	}
+	flush()
+	if next != start+uint32(ext.Size) {
+		return fmt.Errorf("not enough results found [%d %d]", next, start+ext.Size)
+	}
+	if err := ext.Wait(); err != nil {
+		return fmt.Errorf("exec: %v", err)
+	}
+
+	return nil
+}
+
+var (
+	undefined      = []byte("<UNDEFINED>")
+	unpredictable  = []byte("<UNPREDICTABLE>")
+	illegalShifter = []byte("<illegal shifter operand>")
+)
+
+func parseLine(line []byte, encstart []byte) (addr uint32, enc []byte, text string) {
+	oline := line
+	i := index(line, ":\t")
+	if i < 0 {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	x, err := strconv.ParseUint(string(trimSpace(line[:i])), 16, 32)
+	if err != nil {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	addr = uint32(x)
+	line = line[i+2:]
+	i = bytes.IndexByte(line, '\t')
+	if i < 0 {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	enc, ok := parseHex(line[:i], encstart)
+	if !ok {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	line = trimSpace(line[i:])
+	if bytes.Contains(line, undefined) {
+		text = "undefined"
+		return
+	}
+	if bytes.Contains(line, illegalShifter) {
+		text = "undefined"
+		return
+	}
+	if false && bytes.Contains(line, unpredictable) {
+		text = "unpredictable"
+		return
+	}
+	if i := bytes.IndexByte(line, ';'); i >= 0 {
+		line = trimSpace(line[:i])
+	}
+	text = string(fixSpace(line))
+	return
+}
+
+func parseContinuation(line []byte, enc []byte) []byte {
+	i := index(line, ":\t")
+	if i < 0 {
+		return nil
+	}
+	line = line[i+1:]
+	enc, _ = parseHex(line, enc)
+	return enc
+}
+
+// writeELF32 writes an ELF32 header to the file,
+// describing a text segment that starts at start
+// and extends for size bytes.
+func writeELF32(f *os.File, size int) error {
+	f.Seek(0, 0)
+	var hdr elf.Header32
+	var prog elf.Prog32
+	var sect elf.Section32
+	var buf bytes.Buffer
+	binary.Write(&buf, binary.LittleEndian, &hdr)
+	off1 := buf.Len()
+	binary.Write(&buf, binary.LittleEndian, &prog)
+	off2 := buf.Len()
+	binary.Write(&buf, binary.LittleEndian, &sect)
+	off3 := buf.Len()
+	buf.Reset()
+	data := byte(elf.ELFDATA2LSB)
+	hdr = elf.Header32{
+		Ident:     [16]byte{0x7F, 'E', 'L', 'F', 1, data, 1},
+		Type:      2,
+		Machine:   uint16(elf.EM_ARM),
+		Version:   1,
+		Entry:     start,
+		Phoff:     uint32(off1),
+		Shoff:     uint32(off2),
+		Flags:     0x05000002,
+		Ehsize:    uint16(off1),
+		Phentsize: uint16(off2 - off1),
+		Phnum:     1,
+		Shentsize: uint16(off3 - off2),
+		Shnum:     3,
+		Shstrndx:  2,
+	}
+	binary.Write(&buf, binary.LittleEndian, &hdr)
+	prog = elf.Prog32{
+		Type:   1,
+		Off:    start,
+		Vaddr:  start,
+		Paddr:  start,
+		Filesz: uint32(size),
+		Memsz:  uint32(size),
+		Flags:  5,
+		Align:  start,
+	}
+	binary.Write(&buf, binary.LittleEndian, &prog)
+	binary.Write(&buf, binary.LittleEndian, &sect) // NULL section
+	sect = elf.Section32{
+		Name:      1,
+		Type:      uint32(elf.SHT_PROGBITS),
+		Addr:      start,
+		Off:       start,
+		Size:      uint32(size),
+		Flags:     uint32(elf.SHF_ALLOC | elf.SHF_EXECINSTR),
+		Addralign: 4,
+	}
+	binary.Write(&buf, binary.LittleEndian, &sect) // .text
+	sect = elf.Section32{
+		Name:      uint32(len("\x00.text\x00")),
+		Type:      uint32(elf.SHT_STRTAB),
+		Addr:      0,
+		Off:       uint32(off2 + (off3-off2)*3),
+		Size:      uint32(len("\x00.text\x00.shstrtab\x00")),
+		Addralign: 1,
+	}
+	binary.Write(&buf, binary.LittleEndian, &sect)
+	buf.WriteString("\x00.text\x00.shstrtab\x00")
+	f.Write(buf.Bytes())
+	return nil
+}
--- a/src/cmd/internal/rsc.io/arm/armasm/plan9x.go
+++ b/src/cmd/internal/rsc.io/arm/armasm/plan9x.go
@ -0,0 +1,211 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package armasm
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"strings"
+)
+
+// Plan9Syntax returns the Go assembler syntax for the instruction.
+// The syntax was originally defined by Plan 9.
+// The pc is the program counter of the instruction, used for expanding
+// PC-relative addresses into absolute ones.
+// The symname function queries the symbol table for the program
+// being disassembled. Given a target address it returns the name and base
+// address of the symbol containing the target, if any; otherwise it returns "", 0.
+// The reader r should read from the text segment using text addresses
+// as offsets; it is used to display pc-relative loads as constant loads.
+func Plan9Syntax(inst Inst, pc uint64, symname func(uint64) (string, uint64), text io.ReaderAt) string {
+	if symname == nil {
+		symname = func(uint64) (string, uint64) { return "", 0 }
+	}
+
+	var args []string
+	for _, a := range inst.Args {
+		if a == nil {
+			break
+		}
+		args = append(args, plan9Arg(&inst, pc, symname, a))
+	}
+
+	op := inst.Op.String()
+
+	switch inst.Op &^ 15 {
+	case LDR_EQ, LDRB_EQ, LDRH_EQ:
+		// Check for RET
+		reg, _ := inst.Args[0].(Reg)
+		mem, _ := inst.Args[1].(Mem)
+		if inst.Op&^15 == LDR_EQ && reg == R15 && mem.Base == SP && mem.Sign == 0 && mem.Mode == AddrPostIndex {
+			return fmt.Sprintf("RET%s #%d", op[3:], mem.Offset)
+		}
+
+		// Check for PC-relative load.
+		if mem.Base == PC && mem.Sign == 0 && mem.Mode == AddrOffset && text != nil {
+			addr := uint32(pc) + 8 + uint32(mem.Offset)
+			buf := make([]byte, 4)
+			switch inst.Op &^ 15 {
+			case LDRB_EQ:
+				if _, err := text.ReadAt(buf[:1], int64(addr)); err != nil {
+					break
+				}
+				args[1] = fmt.Sprintf("$%#x", buf[0])
+
+			case LDRH_EQ:
+				if _, err := text.ReadAt(buf[:2], int64(addr)); err != nil {
+					break
+				}
+				args[1] = fmt.Sprintf("$%#x", binary.LittleEndian.Uint16(buf))
+
+			case LDR_EQ:
+				if _, err := text.ReadAt(buf, int64(addr)); err != nil {
+					break
+				}
+				x := binary.LittleEndian.Uint32(buf)
+				if s, base := symname(uint64(x)); s != "" && uint64(x) == base {
+					args[1] = fmt.Sprintf("$%s(SB)", s)
+				} else {
+					args[1] = fmt.Sprintf("$%#x", x)
+				}
+			}
+		}
+	}
+
+	// Move addressing mode into opcode suffix.
+	suffix := ""
+	switch inst.Op &^ 15 {
+	case LDR_EQ, LDRB_EQ, LDRH_EQ, STR_EQ, STRB_EQ, STRH_EQ:
+		mem, _ := inst.Args[1].(Mem)
+		switch mem.Mode {
+		case AddrOffset, AddrLDM:
+			// no suffix
+		case AddrPreIndex, AddrLDM_WB:
+			suffix = ".W"
+		case AddrPostIndex:
+			suffix = ".P"
+		}
+		off := ""
+		if mem.Offset != 0 {
+			off = fmt.Sprintf("%#x", mem.Offset)
+		}
+		base := fmt.Sprintf("(R%d)", int(mem.Base))
+		index := ""
+		if mem.Sign != 0 {
+			sign := ""
+			if mem.Sign < 0 {
+				sign = ""
+			}
+			shift := ""
+			if mem.Count != 0 {
+				shift = fmt.Sprintf("%s%d", plan9Shift[mem.Shift], mem.Count)
+			}
+			index = fmt.Sprintf("(%sR%d%s)", sign, int(mem.Index), shift)
+		}
+		args[1] = off + base + index
+	}
+
+	// Reverse args, placing dest last.
+	for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 {
+		args[i], args[j] = args[j], args[i]
+	}
+
+	switch inst.Op &^ 15 {
+	case MOV_EQ:
+		op = "MOVW" + op[3:]
+
+	case LDR_EQ:
+		op = "MOVW" + op[3:] + suffix
+	case LDRB_EQ:
+		op = "MOVB" + op[4:] + suffix
+	case LDRH_EQ:
+		op = "MOVH" + op[4:] + suffix
+
+	case STR_EQ:
+		op = "MOVW" + op[3:] + suffix
+		args[0], args[1] = args[1], args[0]
+	case STRB_EQ:
+		op = "MOVB" + op[4:] + suffix
+		args[0], args[1] = args[1], args[0]
+	case STRH_EQ:
+		op = "MOVH" + op[4:] + suffix
+		args[0], args[1] = args[1], args[0]
+	}
+
+	if args != nil {
+		op += " " + strings.Join(args, ", ")
+	}
+
+	return op
+}
+
+// assembler syntax for the various shifts.
+// @x> is a lie; the assembler uses @> 0
+// instead of @x> 1, but i wanted to be clear that it
+// was a different operation (rotate right extended, not rotate right).
+var plan9Shift = []string{"<<", ">>", "->", "@>", "@x>"}
+
+func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg Arg) string {
+	switch a := arg.(type) {
+	case Endian:
+
+	case Imm:
+		return fmt.Sprintf("$%d", int(a))
+
+	case Mem:
+
+	case PCRel:
+		addr := uint32(pc) + 8 + uint32(a)
+		if s, base := symname(uint64(addr)); s != "" && uint64(addr) == base {
+			return fmt.Sprintf("%s(SB)", s)
+		}
+		return fmt.Sprintf("%#x", addr)
+
+	case Reg:
+		if a < 16 {
+			return fmt.Sprintf("R%d", int(a))
+		}
+
+	case RegList:
+		var buf bytes.Buffer
+		start := -2
+		end := -2
+		fmt.Fprintf(&buf, "[")
+		flush := func() {
+			if start >= 0 {
+				if buf.Len() > 1 {
+					fmt.Fprintf(&buf, ",")
+				}
+				if start == end {
+					fmt.Fprintf(&buf, "R%d", start)
+				} else {
+					fmt.Fprintf(&buf, "R%d-R%d", start, end)
+				}
+			}
+		}
+		for i := 0; i < 16; i++ {
+			if a&(1<<uint(i)) != 0 {
+				if i == end+1 {
+					end++
+					continue
+				}
+				start = i
+				end = i
+			}
+		}
+		flush()
+		fmt.Fprintf(&buf, "]")
+		return buf.String()
+
+	case RegShift:
+		return fmt.Sprintf("R%d%s$%d", int(a.Reg), plan9Shift[a.Shift], int(a.Count))
+
+	case RegShiftReg:
+		return fmt.Sprintf("R%d%sR%d", int(a.Reg), plan9Shift[a.Shift], int(a.RegCount))
+	}
+	return strings.ToUpper(arg.String())
+}
--- a/src/cmd/internal/rsc.io/arm/armasm/tables.go
+++ b/src/cmd/internal/rsc.io/arm/armasm/tables.go
--- a/src/cmd/internal/rsc.io/arm/armasm/testdata/Makefile
+++ b/src/cmd/internal/rsc.io/arm/armasm/testdata/Makefile
@ -0,0 +1,5 @@
+newdecode.txt:
+	cd ..; go test -cover -run 'ObjdumpARMCond' -v -timeout 10h -printtests -long 2>&1 | tee log
+	cd ..; go test -cover -run 'ObjdumpARMUncond' -v -timeout 10h -printtests -long 2>&1 | tee -a log
+	egrep '	(gnu|plan9)	' ../log |sort >newdecode.txt
+
--- a/src/cmd/internal/rsc.io/arm/armasm/testdata/decode.txt
+++ b/src/cmd/internal/rsc.io/arm/armasm/testdata/decode.txt
@ -0,0 +1,306 @@
+000001f1|	1	gnu	setend le
+00100f61|	1	gnu	mrsvs r1, apsr
+00f02053|	1	gnu	noppl
+00f0d4f4|	1	gnu	pli [r4]
+01f020d3|	1	gnu	yieldle
+02002d59|	1	gnu	stmdbpl sp!, {r1}
+021da9d8|	1	gnu	stmle r9!, {r1, r8, sl, fp, ip}
+02c0b071|	1	gnu	movsvc ip, r2
+02f02073|	1	gnu	wfevc
+03f02013|	1	gnu	wfine
+03f05df7|	1	gnu	pld [sp, -r3]
+04009d34|	1	gnu	popcc {r0}
+043a52b1|	1	gnu	cmplt r2, r4, lsl #20
+04402de5|	1	gnu	push {r4}
+045b148d|	1	gnu	vldrhi d5, [r4, #-16]
+04f02093|	1	gnu	sevls
+0793eab0|	1	gnu	rsclt r9, sl, r7, lsl #6
+079bfb9e|	1	gnu	vmovls.f64 d25, #183
+0a4fc9d3|	1	gnu	bicle r4, r9, #10, 30
+0bac7ab6|	1	gnu	ldrbtlt sl, [sl], -fp, lsl #24
+0c2aee44|	1	gnu	strbtmi r2, [lr], #2572
+0c4bb000|	1	gnu	adcseq r4, r0, ip, lsl #22
+0e26d561|	1	gnu	bicsvs r2, r5, lr, lsl #12
+0f0fa011|	1	gnu	lslne r0, pc, #30
+0fa448e0|	1	gnu	sub sl, r8, pc, lsl #8
+101af1de|	1	gnu	vmrsle r1, fpscr
+108a0cee|	1	gnu	vmov s24, r8
+108a1dae|	1	gnu	vmovge r8, s26
+108ae14e|	1	gnu	vmsrmi fpscr, r8
+10faf1ae|	1	gnu	vmrsge apsr_nzcv, fpscr
+10fb052e|	1	gnu	vmovcs.32 d5[0], pc
+11c902b7|	1	gnu	smladlt r2, r1, r9, ip
+11ef5b16|	1	gnu	uadd16ne lr, fp, r1
+12fa87a7|	1	gnu	usad8ge r7, r2, sl
+135f2956|	1	gnu	qadd16pl r5, r9, r3
+13de9aa1|	1	gnu	orrsge sp, sl, r3, lsl lr
+145c0e40|	1	gnu	andmi r5, lr, r4, lsl ip
+150f7fd6|	1	gnu	uhadd16le r0, pc, r5
+15b9bf12|	1	gnu	adcsne fp, pc, #344064
+16373391|	1	gnu	teqls r3, r6, lsl r7
+19ef1966|	1	gnu	sadd16vs lr, r9, r9
+1ab0b091|	1	gnu	lslsls fp, sl, r0
+1b9f6fe6|	1	gnu	uqadd16 r9, pc, fp
+1bb58557|	1	gnu	usada8pl r5, fp, r5, fp
+1beff8e0|	1	gnu	rscs lr, r8, fp, lsl pc
+1caff0e6|	1	gnu	usat sl, #16, ip, lsl #30
+1d0f3d36|	1	gnu	shadd16cc r0, sp, sp
+1dca1d52|	1	gnu	andspl ip, sp, #118784
+1e4891d0|	1	gnu	addsle r4, r1, lr, lsl r8
+1f0889e6|	1	gnu	pkhbt r0, r9, pc, lsl #16
+1f1f6fe1|	1	gnu	clz r1, pc
+1f26d157|	1	gnu	bfcpl r2, #12, #6
+1ff07ff5|	1	gnu	clrex
+1fff2fd1|	1	gnu	bxle pc
+20f153f6|	1	gnu	pli [r3, -r0, lsr #2]
+21047013|	1	gnu	cmnne r0, #553648128
+21c2eb8b|	1	gnu	blhi .-0x50f778
+21c2ebfb|	1	gnu	blx .-0x50f776
+21fa62ee|	1	gnu	vmul.f32 s31, s4, s3
+23005720|	1	gnu	subscs r0, r7, r3, lsr #32
+236a303e|	1	gnu	vaddcc.f32 s12, s0, s7
+23f055f6|	1	gnu	pli [r5, -r3, lsr #32]
+2430a031|	1	gnu	lsrcc r3, r4, #32
+245d0803|	1	gnu	movweq r5, #36132
+251a86be|	1	gnu	vdivlt.f32 s2, s12, s11
+25db7b81|	1	gnu	cmnhi fp, r5, lsr #22
+26bc3553|	1	gnu	teqpl r5, #9728
+277c2d69|	1	gnu	pushvs {r0, r1, r2, r5, sl, fp, ip, sp, lr}
+29fc1cf5|	1	gnu	pldw [ip, #-3113]
+29ff2fc1|	1	gnu	bxjgt r9
+2decd9c0|	1	gnu	sbcsgt lr, r9, sp, lsr #24
+30fa5e47|	1	gnu	smmulrmi lr, r0, sl
+316f64d6|	1	gnu	uqasxle r6, r4, r1
+323f5da6|	1	gnu	uasxge r3, sp, r2
+327fe5e6|	1	gnu	usat16 r7, #5, r2
+330151e3|	1	gnu	cmp r1, #-1073741812
+34af2ae6|	1	gnu	qasx sl, sl, r4
+35fd3710|	1	gnu	eorsne pc, r7, r5, lsr sp
+36def1c1|	1	gnu	mvnsgt sp, r6, lsr lr
+3801b061|	1	gnu	lsrsvs r0, r8, r1
+38985477|	1	gnu	smmlarvc r4, r8, r8, r9
+3a2fbfa6|	1	gnu	revge r2, sl
+3a3f1b06|	1	gnu	sasxeq r3, fp, sl
+3a7fa346|	1	gnu	ssat16mi r7, #4, sl
+3a943b94|	1	gnu	ldrtls r9, [fp], #-1082
+3bf505e7|	1	gnu	smuadx r5, fp, r5
+3cef7086|	1	gnu	uhasxhi lr, r0, ip
+3e5f3ec6|	1	gnu	shasxgt r5, lr, lr
+3f4fff86|	1	gnu	rbithi r4, pc
+3faf4717|	1	gnu	smlaldxne sl, r7, pc, pc
+3fff2fc1|	1	gnu	blxgt pc
+402bbf7e|	1	gnu	vcvtvc.u16.f64 d2, d2, #16
+403ab5de|	1	gnu	vcmple.f32 s6, #0
+40eb363e|	1	gnu	vsubcc.f64 d14, d6, d0
+420f73d1|	1	gnu	cmnle r3, r2, asr #30
+424a648e|	1	gnu	vnmulhi.f32 s9, s8, s4
+4284d717|	1	gnu	ldrbne r8, [r7, r2, asr #8]
+42a599c3|	1	gnu	orrsgt sl, r9, #276824064
+42abf0be|	1	gnu	vmovlt.f64 d26, d2
+446ea031|	1	gnu	asrcc r6, r4, #28
+4a953557|	1	gnu	ldrpl r9, [r5, -sl, asr #10]!
+4ab6f712|	1	gnu	rscsne fp, r7, #77594624
+4af07ff5|	1	gnu	dsb #10
+4df6def4|	1	gnu	pli [lr, #1613]
+4efbf52e|	1	gnu	vcmpcs.f64 d31, #0
+50aaac79|	1	gnu	stmibvc ip!, {r4, r6, r9, fp, sp, pc}
+50caf011|	1	gnu	mvnsne ip, r0, asr sl
+50f04961|	1	gnu	qdaddvs pc, r0, r9
+51282008|	1	gnu	stmdaeq r0!, {r0, r4, r6, fp, sp}
+52bf6576|	1	gnu	uqsaxvc fp, r5, r2
+5345c9d0|	1	gnu	sbcle r4, r9, r3, asr r5
+538f5e46|	1	gnu	usaxmi r8, lr, r3
+54106d31|	1	gnu	qdsubcc r1, r4, sp
+56e0e557|	1	gnu	ubfxpl lr, r6, #0, #6
+57073d11|	1	gnu	teqne sp, r7, asr r7
+58bb0aa9|	1	gnu	stmdbge sl, {r3, r4, r6, r8, r9, fp, ip, sp, pc}
+58f007b1|	1	gnu	qaddlt pc, r8, r7
+59fd0e77|	1	gnu	smusdvc lr, r9, sp
+5ab7f1c5|	1	gnu	ldrbgt fp, [r1, #1882]!
+5abf23c6|	1	gnu	qsaxgt fp, r3, sl
+5b8f1c96|	1	gnu	ssaxls r8, ip, fp
+5b98ab97|	1	gnu	sbfxls r9, fp, #16, #12
+5bc9b041|	1	gnu	asrsmi ip, fp, r9
+5bf07ff5|	1	gnu	dmb #11
+5c102b81|	1	gnu	qsubhi r1, ip, fp
+5caa49e1|	1	gnu	qdadd sl, ip, r9
+5d3f7226|	1	gnu	uhsaxcs r3, r2, sp
+5db55470|	1	gnu	subsvc fp, r4, sp, asr r5
+5ef14387|	1	gnu	smlsldhi pc, r3, lr, r1
+5f540a11|	1	gnu	qaddne r5, pc, sl
+5f9079d1|	1	gnu	cmnle r9, pc, asr r0
+5faf3f66|	1	gnu	shsaxvs sl, pc, pc
+605071d7|	1	gnu	ldrble r5, [r1, -r0, rrx]!
+614adc76|	1	gnu	ldrbvc r4, [ip], r1, ror #20
+616b9e42|	1	gnu	addsmi r6, lr, #99328
+62c84f15|	1	gnu	strbne ip, [pc, #-2146]
+62f051f7|	1	gnu	pld [r1, -r2, rrx]
+6346c393|	1	gnu	bicls r4, r3, #103809024
+654abbae|	1	gnu	vcvtge.f32.u16 s8, s8, #5
+65a5f0e3|	1	gnu	mvns sl, #423624704
+65f796f7|	1	gnu	pldw [r6, r5, ror #14]
+670bb12e|	1	gnu	vnegcs.f64 d0, d23
+67903731|	1	gnu	teqcc r7, r7, rrx
+68ddc637|	1	gnu	strbcc sp, [r6, r8, ror #26]
+695b3ab6|	1	gnu	ldrtlt r5, [sl], -r9, ror #22
+697cfc71|	1	gnu	mvnsvc r7, r9, ror #24
+6a0ab3ee|	1	gnu	vcvtb.f16.f32 s0, s21
+6ad9ad54|	1	gnu	strtpl sp, [sp], #2410
+6af07ff5|	1	gnu	isb #10
+6afa6f10|	1	gnu	rsbne pc, pc, sl, ror #20
+6d5b19ee|	1	gnu	vnmla.f64 d5, d9, d29
+6d60b071|	1	gnu	rrxsvc r6, sp
+6df754f7|	1	gnu	pld [r4, -sp, ror #14]
+70065821|	1	gnu	cmpcs r8, r0, ror r6
+7050ed86|	1	gnu	uxtabhi r5, sp, r0
+715f1186|	1	gnu	ssub16hi r5, r1, r1
+716c9805|	1	gnu	ldreq r6, [r8, #3185]
+718d5ab1|	1	gnu	cmplt sl, r1, ror sp
+71c8cfb6|	1	gnu	uxtb16lt ip, r1, ror #16
+7294af06|	1	gnu	sxtbeq r9, r2, ror #8
+72c0bac6|	1	gnu	sxtahgt ip, sl, r2
+730f6716|	1	gnu	uqsub16ne r0, r7, r3
+73608f46|	1	gnu	sxtb16mi r6, r3
+73687f22|	1	gnu	rsbscs r6, pc, #7536640
+74308816|	1	gnu	sxtab16ne r3, r8, r4
+757f3456|	1	gnu	shsub16pl r7, r4, r5
+77788016|	1	gnu	sxtab16ne r7, r0, r7, ror #16
+78061671|	1	gnu	tstvc r6, r8, ror r6
+780a2fe1|	1	gnu	bkpt 0xf0a8
+7850abd6|	1	gnu	sxtable r5, fp, r8
+792cef26|	1	gnu	uxtbcs r2, r9, ror #24
+799eb8e0|	1	gnu	adcs r9, r8, r9, ror lr
+799f5726|	1	gnu	usub16cs r9, r7, r9
+79d0bf16|	1	gnu	sxthne sp, r9
+7a037ba1|	1	gnu	cmnge fp, sl, ror r3
+7b0f2566|	1	gnu	qsub16vs r0, r5, fp
+7b79dd51|	1	gnu	bicspl r7, sp, fp, ror r9
+7b9a9f1d|	1	gnu	vldrne s18, [pc, #492]
+7c70cea6|	1	gnu	uxtab16ge r7, lr, ip
+7d48f966|	1	gnu	uxtahvs r4, r9, sp, ror #16
+7d5c13a1|	1	gnu	tstge r3, sp, ror ip
+7e0001f1|	1	gnu	setend le
+7e1c0ba7|	1	gnu	smlsdxge fp, lr, ip, r1
+7e567e40|	1	gnu	rsbsmi r5, lr, lr, ror r6
+7e8f73b6|	1	gnu	uhsub16lt r8, r3, lr
+7ef0ffd6|	1	gnu	uxthle pc, lr
+7faaa011|	1	gnu	rorne sl, pc, sl
+81f19af7|	1	gnu	pldw [sl, r1, lsl #3]
+82033901|	1	gnu	teqeq r9, r2, lsl #7
+82f316f5|	1	gnu	pldw [r6, #-898]
+830201f1|	1	gnu	setend be
+838a3b91|	1	gnu	teqls fp, r3, lsl #21
+8408af2f|	1	gnu	svccs 0x00af0884
+884201d1|	1	gnu	smlabble r1, r8, r2, r4
+8aa12e31|	1	gnu	smlawbcc lr, sl, r1, sl
+8b9b99c0|	1	gnu	addsgt r9, r9, fp, lsl #23
+8c005c81|	1	gnu	cmphi ip, ip, lsl #1
+8fb429c6|	1	gnu	strtgt fp, [r9], -pc, lsl #9
+907b1f9e|	1	gnu	vmovls.32 r7, d31[0]
+91975f25|	1	gnu	ldrbcs r9, [pc, #-1937]
+91b010e3|	1	gnu	tst r0, #145
+927facb1|	1	gnu	strexdlt r7, r2, [ip]
+92904c91|	1	gnu	swpbls r9, r2, [ip]
+92af1226|	1	gnu	sadd8cs sl, r2, r2
+92b28c70|	1	gnu	umullvc fp, ip, r2, r2
+945f68a6|	1	gnu	uqadd8ge r5, r8, r4
+950b2560|	1	gnu	mlavs r5, r5, fp, r0
+969fcf71|	1	gnu	strexbvc r9, r6, [pc]
+96cf35e6|	1	gnu	shadd8 ip, r5, r6
+98060eb0|	1	gnu	mullt lr, r8, r6
+9843fb93|	1	gnu	mvnsls r4, #152, 6
+9a3fe2b0|	1	gnu	smlallt r3, r2, sl, pc
+9aef58b6|	1	gnu	uadd8lt lr, r8, sl
+9afcdff5|	1	gnu	pld [pc, #3226]
+9c221810|	1	gnu	mulsne r8, ip, r2
+9c3bc9dd|	1	gnu	vstrle d19, [r9, #624]
+9c5f2606|	1	gnu	qadd8eq r5, r6, ip
+9d87dac0|	1	gnu	smullsgt r8, sl, sp, r7
+9e0f7c86|	1	gnu	uhadd8hi r0, ip, lr
+9e814560|	1	gnu	umaalvs r8, r5, lr, r1
+9e9f8dc1|	1	gnu	strexgt r9, lr, [sp]
+9ec3c9d7|	1	gnu	bfile ip, lr, #7, #3
+9ed26d90|	1	gnu	mlsls sp, lr, r2, sp
+9f7fd9c1|	1	gnu	ldrexbgt r7, [r9]
+9f7fea91|	1	gnu	strexhls r7, pc, [sl]
+9f9f9921|	1	gnu	ldrexcs r9, [r9]
+9faffd21|	1	gnu	ldrexhcs sl, [sp]
+9fcfbd61|	1	gnu	ldrexdvs ip, [sp]
+9ff7a710|	1	gnu	umlalne pc, r7, pc, r7
+a05459d3|	1	gnu	cmple r9, #160, 8
+a3062be1|	1	gnu	smulwb fp, r3, r6
+a68a92b1|	1	gnu	orrslt r8, r2, r6, lsr #21
+abff55f6|	1	gnu	pli [r5, -fp, lsr #31]
+addbf8ea|	1	gnu	b .-0x1c9148
+ae79b021|	1	gnu	lsrscs r7, lr, #19
+b590a3b1|	1	gnu	strhlt r9, [r3, r5]!
+b5b2e390|	1	gnu	strhtls fp, [r3], #37
+b6ac4e30|	1	gnu	strhcc sl, [lr], #-198
+b73fff86|	1	gnu	revshhi r3, r7
+b75fbfc6|	1	gnu	rev16gt r5, r7
+b80b7c80|	1	gnu	ldrhthi r0, [ip], #-184
+b82035e0|	1	gnu	ldrht r2, [r5], -r8
+b8877391|	1	gnu	ldrhls r8, [r3, #-120]!
+b9703e41|	1	gnu	ldrhmi r7, [lr, -r9]!
+b9cf8c16|	1	gnu	selne ip, ip, r9
+bd81bd58|	1	gnu	poppl {r0, r2, r3, r4, r5, r7, r8, pc}
+bdfdb469|	1	gnu	ldmibvs r4!, {r0, r2, r3, r4, r5, r7, r8, sl, fp, ip, sp, lr, pc}
+beb02500|	1	gnu	strhteq fp, [r5], -lr
+bf1a5e42|	1	gnu	subsmi r1, lr, #782336
+c19a4d5e|	1	gnu	vmlspl.f32 s19, s27, s2
+c1aab15e|	1	gnu	vsqrtpl.f32 s20, s2
+c354b003|	1	gnu	movseq r5, #-1023410176
+c4091dc1|	1	gnu	tstgt sp, r4, asr #19
+c50e13a9|	1	gnu	ldmdbge r3, {r0, r2, r6, r7, r9, sl, fp}
+c68c8637|	1	gnu	strcc r8, [r6, r6, asr #25]
+c6ad48e3|	1	gnu	movt sl, #36294
+c6f65ff5|	1	gnu	pld [pc, #-1734]
+c8a92f10|	1	gnu	eorne sl, pc, r8, asr #19
+c9016b61|	1	gnu	smulbtvs fp, r9, r1
+cadbf49e|	1	gnu	vcmpels.f64 d29, d10
+ce9de476|	1	gnu	strbtvc r9, [r4], lr, asr #27
+cf3c1ab1|	1	gnu	tstlt sl, pc, asr #25
+d355aab6|	1	gnu	ssatlt r5, #11, r3, asr #11
+d4f4df10|	1	gnu	ldrsbne pc, [pc], #68
+d6530d61|	1	gnu	ldrdvs r5, [sp, -r6]
+d74d7800|	1	gnu	ldrsbteq r4, [r8], #-215
+d9703680|	1	gnu	ldrsbthi r7, [r6], -r9
+dbe003c0|	1	gnu	ldrdgt lr, [r3], -fp
+dc709561|	1	gnu	ldrsbvs r7, [r5, ip]
+dcc3b9c8|	1	gnu	ldmgt r9!, {r2, r3, r4, r6, r7, r8, r9, lr, pc}
+debfa0e5|	1	gnu	str fp, [r0, #4062]!
+dee062a1|	1	gnu	ldrdge lr, [r2, #-14]!
+dfa05ab7|	1	gnu	smmlslt sl, pc, r0, sl
+e02ef011|	1	gnu	mvnsne r2, r0, ror #29
+e4d41718|	1	gnu	ldmdane r7, {r2, r5, r6, r7, sl, ip, lr, pc}
+e6d0fe34|	1	gnu	ldrbtcc sp, [lr], #230
+e73bf7be|	1	gnu	vcvtlt.f32.f64 s7, d23
+e74e72b3|	1	gnu	cmnlt r2, #3696
+e80bf07e|	1	gnu	vabsvc.f64 d16, d24
+e9b5b001|	1	gnu	rorseq fp, r9, #11
+ea7bbdbe|	1	gnu	vcvtlt.s32.f64 s14, d26
+ec063813|	1	gnu	teqne r8, #236, 12
+ec0e49e1|	1	gnu	smlaltt r0, r9, ip, lr
+ee4ab85e|	1	gnu	vcvtpl.f32.s32 s8, s29
+ef461f25|	1	gnu	ldrcs r4, [pc, #-1775]
+ef5fd002|	1	gnu	sbcseq r5, r0, #956
+f4cf1d36|	1	gnu	ssub8cc ip, sp, r4
+f67f73b6|	1	gnu	uhsub8lt r7, r3, r6
+f6e09ca0|	1	gnu	ldrshge lr, [ip], r6
+f7702e32|	1	gnu	eorcc r7, lr, #247
+fa4dcf20|	1	gnu	strdcs r4, [pc], #218
+fac03720|	1	gnu	ldrshtcs ip, [r7], -sl
+fc0f64c6|	1	gnu	uqsub8gt r0, r4, ip
+fc28f481|	1	gnu	ldrshhi r2, [r4, #140]!
+fc300560|	1	gnu	strdvs r3, [r5], -ip
+fcacfc70|	1	gnu	ldrshtvc sl, [ip], #204
+fdbcfaf7|	1	gnu	undef
+fddf5c86|	1	gnu	usub8hi sp, ip, sp
+fdf02013|	1	gnu	dbgne #13
+fe0319e3|	1	gnu	tst r9, #-134217725
+fe7f3116|	1	gnu	shsub8ne r7, r1, lr
+ff4f2ac6|	1	gnu	qsub8gt r4, sl, pc
+ff818c71|	1	gnu	strdvc r8, [ip, pc]
+|6b5721d3	1	gnu	error: unknown instruction
+|76452001	1	gnu	error: unknown instruction
+|97acd647	1	gnu	error: unknown instruction
--- a/src/cmd/internal/rsc.io/x86/x86asm/Makefile
+++ b/src/cmd/internal/rsc.io/x86/x86asm/Makefile
@ -0,0 +1,3 @@
+tables.go: ../x86map/map.go ../x86.csv 
+	go run ../x86map/map.go -fmt=decoder ../x86.csv >_tables.go && gofmt _tables.go >tables.go && rm _tables.go
+
--- a/src/cmd/internal/rsc.io/x86/x86asm/decode.go
+++ b/src/cmd/internal/rsc.io/x86/x86asm/decode.go
--- a/src/cmd/internal/rsc.io/x86/x86asm/decode_test.go
+++ b/src/cmd/internal/rsc.io/x86/x86asm/decode_test.go
@ -0,0 +1,71 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"encoding/hex"
+	"io/ioutil"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+func TestDecode(t *testing.T) {
+	data, err := ioutil.ReadFile("testdata/decode.txt")
+	if err != nil {
+		t.Fatal(err)
+	}
+	all := string(data)
+	for strings.Contains(all, "\t\t") {
+		all = strings.Replace(all, "\t\t", "\t", -1)
+	}
+	for _, line := range strings.Split(all, "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		f := strings.SplitN(line, "\t", 4)
+		i := strings.Index(f[0], "|")
+		if i < 0 {
+			t.Errorf("parsing %q: missing | separator", f[0])
+			continue
+		}
+		if i%2 != 0 {
+			t.Errorf("parsing %q: misaligned | separator", f[0])
+		}
+		size := i / 2
+		code, err := hex.DecodeString(f[0][:i] + f[0][i+1:])
+		if err != nil {
+			t.Errorf("parsing %q: %v", f[0], err)
+			continue
+		}
+		mode, err := strconv.Atoi(f[1])
+		if err != nil {
+			t.Errorf("invalid mode %q in: %s", f[1], line)
+			continue
+		}
+		syntax, asm := f[2], f[3]
+		inst, err := Decode(code, mode)
+		var out string
+		if err != nil {
+			out = "error: " + err.Error()
+		} else {
+			switch syntax {
+			case "gnu":
+				out = GNUSyntax(inst)
+			case "intel":
+				out = IntelSyntax(inst)
+			case "plan9":
+				out = Plan9Syntax(inst, 0, nil)
+			default:
+				t.Errorf("unknown syntax %q", syntax)
+				continue
+			}
+		}
+		if out != asm || inst.Len != size {
+			t.Errorf("Decode(%s) [%s] = %s, %d, want %s, %d", f[0], syntax, out, inst.Len, asm, size)
+		}
+	}
+}
--- a/src/cmd/internal/rsc.io/x86/x86asm/ext_test.go
+++ b/src/cmd/internal/rsc.io/x86/x86asm/ext_test.go
@ -0,0 +1,811 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Support for testing against external disassembler program.
+
+package x86asm
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/hex"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"math/rand"
+	"os"
+	"os/exec"
+	"regexp"
+	"runtime"
+	"strings"
+	"testing"
+	"time"
+)
+
+var (
+	printTests = flag.Bool("printtests", false, "print test cases that exercise new code paths")
+	dumpTest   = flag.Bool("dump", false, "dump all encodings")
+	mismatch   = flag.Bool("mismatch", false, "log allowed mismatches")
+	longTest   = flag.Bool("long", false, "long test")
+	keep       = flag.Bool("keep", false, "keep object files around")
+	debug      = false
+)
+
+// A ExtInst represents a single decoded instruction parsed
+// from an external disassembler's output.
+type ExtInst struct {
+	addr uint32
+	enc  [32]byte
+	nenc int
+	text string
+}
+
+func (r ExtInst) String() string {
+	return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text)
+}
+
+// An ExtDis is a connection between an external disassembler and a test.
+type ExtDis struct {
+	Arch     int
+	Dec      chan ExtInst
+	File     *os.File
+	Size     int
+	KeepFile bool
+	Cmd      *exec.Cmd
+}
+
+// Run runs the given command - the external disassembler - and returns
+// a buffered reader of its standard output.
+func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) {
+	if *keep {
+		log.Printf("%s\n", strings.Join(cmd, " "))
+	}
+	ext.Cmd = exec.Command(cmd[0], cmd[1:]...)
+	out, err := ext.Cmd.StdoutPipe()
+	if err != nil {
+		return nil, fmt.Errorf("stdoutpipe: %v", err)
+	}
+	if err := ext.Cmd.Start(); err != nil {
+		return nil, fmt.Errorf("exec: %v", err)
+	}
+
+	b := bufio.NewReaderSize(out, 1<<20)
+	return b, nil
+}
+
+// Wait waits for the command started with Run to exit.
+func (ext *ExtDis) Wait() error {
+	return ext.Cmd.Wait()
+}
+
+// testExtDis tests a set of byte sequences against an external disassembler.
+// The disassembler is expected to produce the given syntax and be run
+// in the given architecture mode (16, 32, or 64-bit).
+// The extdis function must start the external disassembler
+// and then parse its output, sending the parsed instructions on ext.Dec.
+// The generate function calls its argument f once for each byte sequence
+// to be tested. The generate function itself will be called twice, and it must
+// make the same sequence of calls to f each time.
+// When a disassembly does not match the internal decoding,
+// allowedMismatch determines whether this mismatch should be
+// allowed, or else considered an error.
+func testExtDis(
+	t *testing.T,
+	syntax string,
+	arch int,
+	extdis func(ext *ExtDis) error,
+	generate func(f func([]byte)),
+	allowedMismatch func(text string, size int, inst *Inst, dec ExtInst) bool,
+) {
+	start := time.Now()
+	ext := &ExtDis{
+		Dec:  make(chan ExtInst),
+		Arch: arch,
+	}
+	errc := make(chan error)
+
+	// First pass: write instructions to input file for external disassembler.
+	file, f, size, err := writeInst(generate)
+	if err != nil {
+		t.Fatal(err)
+	}
+	ext.Size = size
+	ext.File = f
+	defer func() {
+		f.Close()
+		if !*keep {
+			os.Remove(file)
+		}
+	}()
+
+	// Second pass: compare disassembly against our decodings.
+	var (
+		totalTests  = 0
+		totalSkips  = 0
+		totalErrors = 0
+
+		errors = make([]string, 0, 100) // sampled errors, at most cap
+	)
+	go func() {
+		errc <- extdis(ext)
+	}()
+	generate(func(enc []byte) {
+		dec, ok := <-ext.Dec
+		if !ok {
+			t.Errorf("decoding stream ended early")
+			return
+		}
+		inst, text := disasm(syntax, arch, pad(enc))
+		totalTests++
+		if *dumpTest {
+			fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc)
+		}
+		if text != dec.text || inst.Len != dec.nenc {
+			suffix := ""
+			if allowedMismatch(text, size, &inst, dec) {
+				totalSkips++
+				if !*mismatch {
+					return
+				}
+				suffix += " (allowed mismatch)"
+			}
+			totalErrors++
+			if len(errors) >= cap(errors) {
+				j := rand.Intn(totalErrors)
+				if j >= cap(errors) {
+					return
+				}
+				errors = append(errors[:j], errors[j+1:]...)
+			}
+			errors = append(errors, fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s", enc, text, inst.Len, dec.text, dec.nenc, suffix))
+		}
+	})
+
+	if *mismatch {
+		totalErrors -= totalSkips
+	}
+
+	for _, b := range errors {
+		t.Log(b)
+	}
+
+	if totalErrors > 0 {
+		t.Fail()
+	}
+	t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds())
+
+	if err := <-errc; err != nil {
+		t.Fatal("external disassembler: %v", err)
+	}
+
+}
+
+const start = 0x8000 // start address of text
+
+// writeInst writes the generated byte sequences to a new file
+// starting at offset start. That file is intended to be the input to
+// the external disassembler.
+func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) {
+	f, err = ioutil.TempFile("", "x86map")
+	if err != nil {
+		return
+	}
+
+	file = f.Name()
+
+	f.Seek(start, 0)
+	w := bufio.NewWriter(f)
+	defer w.Flush()
+	size = 0
+	generate(func(x []byte) {
+		if len(x) > 16 {
+			x = x[:16]
+		}
+		if debug {
+			fmt.Printf("%#x: %x%x\n", start+size, x, pops[len(x):])
+		}
+		w.Write(x)
+		w.Write(pops[len(x):])
+		size += len(pops)
+	})
+	return file, f, size, nil
+}
+
+// 0x5F is a single-byte pop instruction.
+// We pad the bytes we want decoded with enough 0x5Fs
+// that no matter what state the instruction stream is in
+// after reading our bytes, the pops will get us back to
+// a forced instruction boundary.
+var pops = []byte{
+	0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f,
+	0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f,
+	0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f,
+	0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f,
+}
+
+// pad pads the code sequenc with pops.
+func pad(enc []byte) []byte {
+	return append(enc[:len(enc):len(enc)], pops...)
+}
+
+// disasm returns the decoded instruction and text
+// for the given source bytes, using the given syntax and mode.
+func disasm(syntax string, mode int, src []byte) (inst Inst, text string) {
+	// If printTests is set, we record the coverage value
+	// before and after, and we write out the inputs for which
+	// coverage went up, in the format expected in testdata/decode.text.
+	// This produces a fairly small set of test cases that exercise nearly
+	// all the code.
+	var cover float64
+	if *printTests {
+		cover -= coverage()
+	}
+
+	inst, err := decode1(src, mode, syntax == "gnu")
+	if err != nil {
+		text = "error: " + err.Error()
+	} else {
+		switch syntax {
+		case "gnu":
+			text = GNUSyntax(inst)
+		case "intel":
+			text = IntelSyntax(inst)
+		case "plan9":
+			text = Plan9Syntax(inst, 0, nil)
+		default:
+			text = "error: unknown syntax " + syntax
+		}
+	}
+
+	if *printTests {
+		cover += coverage()
+		if cover > 0 {
+			max := len(src)
+			if max > 16 && inst.Len <= 16 {
+				max = 16
+			}
+			fmt.Printf("%x|%x\t%d\t%s\t%s\n", src[:inst.Len], src[inst.Len:max], mode, syntax, text)
+		}
+	}
+
+	return
+}
+
+// coverage returns a floating point number denoting the
+// test coverage until now. The number increases when new code paths are exercised,
+// both in the Go program and in the decoder byte code.
+func coverage() float64 {
+	/*
+		testing.Coverage is not in the main distribution.
+		The implementation, which must go in package testing, is:
+
+		// Coverage reports the current code coverage as a fraction in the range [0, 1].
+		func Coverage() float64 {
+			var n, d int64
+			for _, counters := range cover.Counters {
+				for _, c := range counters {
+					if c > 0 {
+						n++
+					}
+					d++
+				}
+			}
+			if d == 0 {
+				return 0
+			}
+			return float64(n) / float64(d)
+		}
+	*/
+
+	var f float64
+	// f += testing.Coverage()
+	f += decodeCoverage()
+	return f
+}
+
+func decodeCoverage() float64 {
+	n := 0
+	for _, t := range decoderCover {
+		if t {
+			n++
+		}
+	}
+	return float64(1+n) / float64(1+len(decoderCover))
+}
+
+// Helpers for writing disassembler output parsers.
+
+// isPrefix reports whether text is the name of an instruction prefix.
+func isPrefix(text string) bool {
+	return prefixByte[text] > 0
+}
+
+// prefixByte maps instruction prefix text to actual prefix byte values.
+var prefixByte = map[string]byte{
+	"es":       0x26,
+	"cs":       0x2e,
+	"ss":       0x36,
+	"ds":       0x3e,
+	"fs":       0x64,
+	"gs":       0x65,
+	"data16":   0x66,
+	"addr16":   0x67,
+	"lock":     0xf0,
+	"repn":     0xf2,
+	"repne":    0xf2,
+	"rep":      0xf3,
+	"repe":     0xf3,
+	"xacquire": 0xf2,
+	"xrelease": 0xf3,
+	"bnd":      0xf2,
+	"addr32":   0x66,
+	"data32":   0x67,
+}
+
+// hasPrefix reports whether any of the space-separated words in the text s
+// begins with any of the given prefixes.
+func hasPrefix(s string, prefixes ...string) bool {
+	for _, prefix := range prefixes {
+		for s := s; s != ""; {
+			if strings.HasPrefix(s, prefix) {
+				return true
+			}
+			i := strings.Index(s, " ")
+			if i < 0 {
+				break
+			}
+			s = s[i+1:]
+		}
+	}
+	return false
+}
+
+// contains reports whether the text s contains any of the given substrings.
+func contains(s string, substrings ...string) bool {
+	for _, sub := range substrings {
+		if strings.Contains(s, sub) {
+			return true
+		}
+	}
+	return false
+}
+
+// isHex reports whether b is a hexadecimal character (0-9A-Fa-f).
+func isHex(b byte) bool { return b == '0' || unhex[b] > 0 }
+
+// parseHex parses the hexadecimal byte dump in hex,
+// appending the parsed bytes to raw and returning the updated slice.
+// The returned bool signals whether any invalid hex was found.
+// Spaces and tabs between bytes are okay but any other non-hex is not.
+func parseHex(hex []byte, raw []byte) ([]byte, bool) {
+	hex = trimSpace(hex)
+	for j := 0; j < len(hex); {
+		for hex[j] == ' ' || hex[j] == '\t' {
+			j++
+		}
+		if j >= len(hex) {
+			break
+		}
+		if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) {
+			return nil, false
+		}
+		raw = append(raw, unhex[hex[j]]<<4|unhex[hex[j+1]])
+		j += 2
+	}
+	return raw, true
+}
+
+var unhex = [256]byte{
+	'0': 0,
+	'1': 1,
+	'2': 2,
+	'3': 3,
+	'4': 4,
+	'5': 5,
+	'6': 6,
+	'7': 7,
+	'8': 8,
+	'9': 9,
+	'A': 10,
+	'B': 11,
+	'C': 12,
+	'D': 13,
+	'E': 14,
+	'F': 15,
+	'a': 10,
+	'b': 11,
+	'c': 12,
+	'd': 13,
+	'e': 14,
+	'f': 15,
+}
+
+// index is like bytes.Index(s, []byte(t)) but avoids the allocation.
+func index(s []byte, t string) int {
+	i := 0
+	for {
+		j := bytes.IndexByte(s[i:], t[0])
+		if j < 0 {
+			return -1
+		}
+		i = i + j
+		if i+len(t) > len(s) {
+			return -1
+		}
+		for k := 1; k < len(t); k++ {
+			if s[i+k] != t[k] {
+				goto nomatch
+			}
+		}
+		return i
+	nomatch:
+		i++
+	}
+}
+
+// fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s.
+// If s must be rewritten, it is rewritten in place.
+func fixSpace(s []byte) []byte {
+	s = trimSpace(s)
+	for i := 0; i < len(s); i++ {
+		if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' {
+			goto Fix
+		}
+	}
+	return s
+
+Fix:
+	b := s
+	w := 0
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		if c == '\t' || c == '\n' {
+			c = ' '
+		}
+		if c == ' ' && w > 0 && b[w-1] == ' ' {
+			continue
+		}
+		b[w] = c
+		w++
+	}
+	if w > 0 && b[w-1] == ' ' {
+		w--
+	}
+	return b[:w]
+}
+
+// trimSpace trims leading and trailing space from s, returning a subslice of s.
+func trimSpace(s []byte) []byte {
+	j := len(s)
+	for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') {
+		j--
+	}
+	i := 0
+	for i < j && (s[i] == ' ' || s[i] == '\t') {
+		i++
+	}
+	return s[i:j]
+}
+
+// pcrel and pcrelw match instructions using relative addressing mode.
+var (
+	pcrel  = regexp.MustCompile(`^((?:.* )?(?:j[a-z]+|call|ljmp|loopn?e?w?|xbegin)q?(?:,p[nt])?) 0x([0-9a-f]+)$`)
+	pcrelw = regexp.MustCompile(`^((?:.* )?(?:callw|jmpw|xbeginw|ljmpw)(?:,p[nt])?) 0x([0-9a-f]+)$`)
+)
+
+// Generators.
+//
+// The test cases are described as functions that invoke a callback repeatedly,
+// with a new input sequence each time. These helpers make writing those
+// a little easier.
+
+// hexCases generates the cases written in hexadecimal in the encoded string.
+// Spaces in 'encoded' separate entire test cases, not individual bytes.
+func hexCases(t *testing.T, encoded string) func(func([]byte)) {
+	return func(try func([]byte)) {
+		for _, x := range strings.Fields(encoded) {
+			src, err := hex.DecodeString(x)
+			if err != nil {
+				t.Errorf("parsing %q: %v", x, err)
+			}
+			try(src)
+		}
+	}
+}
+
+// testdataCases generates the test cases recorded in testdata/decode.txt.
+// It only uses the inputs; it ignores the answers recorded in that file.
+func testdataCases(t *testing.T) func(func([]byte)) {
+	var codes [][]byte
+	data, err := ioutil.ReadFile("testdata/decode.txt")
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, line := range strings.Split(string(data), "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		f := strings.Fields(line)[0]
+		i := strings.Index(f, "|")
+		if i < 0 {
+			t.Errorf("parsing %q: missing | separator", f)
+			continue
+		}
+		if i%2 != 0 {
+			t.Errorf("parsing %q: misaligned | separator", f)
+		}
+		code, err := hex.DecodeString(f[:i] + f[i+1:])
+		if err != nil {
+			t.Errorf("parsing %q: %v", f, err)
+			continue
+		}
+		codes = append(codes, code)
+	}
+
+	return func(try func([]byte)) {
+		for _, code := range codes {
+			try(code)
+		}
+	}
+}
+
+// manyPrefixes generates all possible 2⁹ combinations of nine chosen prefixes.
+// The relative ordering of the prefixes within the combinations varies deterministically.
+func manyPrefixes(try func([]byte)) {
+	var prefixBytes = []byte{0x66, 0x67, 0xF0, 0xF2, 0xF3, 0x3E, 0x36, 0x66, 0x67}
+	var enc []byte
+	for i := 0; i < 1<<uint(len(prefixBytes)); i++ {
+		enc = enc[:0]
+		for j, p := range prefixBytes {
+			if i&(1<<uint(j)) != 0 {
+				enc = append(enc, p)
+			}
+		}
+		if len(enc) > 0 {
+			k := i % len(enc)
+			enc[0], enc[k] = enc[k], enc[0]
+		}
+		try(enc)
+	}
+}
+
+// basicPrefixes geneartes 8 different possible prefix cases: no prefix
+// and then one each of seven different prefix bytes.
+func basicPrefixes(try func([]byte)) {
+	try(nil)
+	for _, b := range []byte{0x66, 0x67, 0xF0, 0xF2, 0xF3, 0x3E, 0x36} {
+		try([]byte{b})
+	}
+}
+
+func rexPrefixes(try func([]byte)) {
+	try(nil)
+	for _, b := range []byte{0x40, 0x48, 0x43, 0x4C} {
+		try([]byte{b})
+	}
+}
+
+// concat takes two generators and returns a generator for the
+// cross product of the two, concatenating the results from each.
+func concat(gen1, gen2 func(func([]byte))) func(func([]byte)) {
+	return func(try func([]byte)) {
+		gen1(func(enc1 []byte) {
+			gen2(func(enc2 []byte) {
+				try(append(enc1[:len(enc1):len(enc1)], enc2...))
+			})
+		})
+	}
+}
+
+// concat3 takes three generators and returns a generator for the
+// cross product of the three, concatenating the results from each.
+func concat3(gen1, gen2, gen3 func(func([]byte))) func(func([]byte)) {
+	return func(try func([]byte)) {
+		gen1(func(enc1 []byte) {
+			gen2(func(enc2 []byte) {
+				gen3(func(enc3 []byte) {
+					try(append(append(enc1[:len(enc1):len(enc1)], enc2...), enc3...))
+				})
+			})
+		})
+	}
+}
+
+// concat4 takes four generators and returns a generator for the
+// cross product of the four, concatenating the results from each.
+func concat4(gen1, gen2, gen3, gen4 func(func([]byte))) func(func([]byte)) {
+	return func(try func([]byte)) {
+		gen1(func(enc1 []byte) {
+			gen2(func(enc2 []byte) {
+				gen3(func(enc3 []byte) {
+					gen4(func(enc4 []byte) {
+						try(append(append(append(enc1[:len(enc1):len(enc1)], enc2...), enc3...), enc4...))
+					})
+				})
+			})
+		})
+	}
+}
+
+// filter generates the sequences from gen that satisfy ok.
+func filter(gen func(func([]byte)), ok func([]byte) bool) func(func([]byte)) {
+	return func(try func([]byte)) {
+		gen(func(enc []byte) {
+			if ok(enc) {
+				try(enc)
+			}
+		})
+	}
+}
+
+// enum8bit generates all possible 1-byte sequences, followed by distinctive padding.
+func enum8bit(try func([]byte)) {
+	for i := 0; i < 1<<8; i++ {
+		try([]byte{byte(i), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88})
+	}
+}
+
+// enum8bit generates all possible 2-byte sequences, followed by distinctive padding.
+func enum16bit(try func([]byte)) {
+	for i := 0; i < 1<<16; i++ {
+		try([]byte{byte(i), byte(i >> 8), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88})
+	}
+}
+
+// enum24bit generates all possible 3-byte sequences, followed by distinctive padding.
+func enum24bit(try func([]byte)) {
+	for i := 0; i < 1<<24; i++ {
+		try([]byte{byte(i), byte(i >> 8), byte(i >> 16), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88})
+	}
+}
+
+// enumModRM generates all possible modrm bytes and, for modrm values that indicate
+// a following sib byte, all possible modrm, sib combinations.
+func enumModRM(try func([]byte)) {
+	for i := 0; i < 256; i++ {
+		if (i>>3)&07 == 04 && i>>6 != 3 { // has sib
+			for j := 0; j < 256; j++ {
+				try([]byte{0, byte(i), byte(j), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) // byte encodings
+				try([]byte{1, byte(i), byte(j), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) // word encodings
+			}
+		} else {
+			try([]byte{0, byte(i), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) // byte encodings
+			try([]byte{1, byte(i), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) // word encodings
+		}
+	}
+}
+
+// fixed generates the single case b.
+// It's mainly useful to prepare an argument for concat or concat3.
+func fixed(b ...byte) func(func([]byte)) {
+	return func(try func([]byte)) {
+		try(b)
+	}
+}
+
+// testBasic runs the given test function with cases all using opcode as the initial opcode bytes.
+// It runs three phases:
+//
+// First, zero-or-one prefixes followed by opcode followed by all possible 1-byte values.
+// If in -short mode, that's all.
+//
+// Second, zero-or-one prefixes followed by opcode followed by all possible 2-byte values.
+// If not in -long mode, that's all. This phase and the next run in parallel with other tests
+// (using t.Parallel).
+//
+// Finally, opcode followed by all possible 3-byte values. The test can take a very long time
+// and prints progress messages to package log.
+func testBasic(t *testing.T, testfn func(*testing.T, func(func([]byte))), opcode ...byte) {
+	testfn(t, concat3(basicPrefixes, fixed(opcode...), enum8bit))
+	if testing.Short() {
+		return
+	}
+
+	t.Parallel()
+	testfn(t, concat3(basicPrefixes, fixed(opcode...), enum16bit))
+	if !*longTest {
+		return
+	}
+
+	name := caller(2)
+	op1 := make([]byte, len(opcode)+1)
+	copy(op1, opcode)
+	for i := 0; i < 256; i++ {
+		log.Printf("%s 24-bit: %d/256\n", name, i)
+		op1[len(opcode)] = byte(i)
+		testfn(t, concat(fixed(op1...), enum16bit))
+	}
+}
+
+func testBasicREX(t *testing.T, testfn func(*testing.T, func(func([]byte))), opcode ...byte) {
+	testfn(t, filter(concat4(basicPrefixes, rexPrefixes, fixed(opcode...), enum8bit), isValidREX))
+	if testing.Short() {
+		return
+	}
+
+	t.Parallel()
+	testfn(t, filter(concat4(basicPrefixes, rexPrefixes, fixed(opcode...), enum16bit), isValidREX))
+	if !*longTest {
+		return
+	}
+
+	name := caller(2)
+	op1 := make([]byte, len(opcode)+1)
+	copy(op1, opcode)
+	for i := 0; i < 256; i++ {
+		log.Printf("%s 24-bit: %d/256\n", name, i)
+		op1[len(opcode)] = byte(i)
+		testfn(t, filter(concat3(rexPrefixes, fixed(op1...), enum16bit), isValidREX))
+	}
+}
+
+// testPrefix runs the given test function for all many prefix possibilities
+// followed by all possible 1-byte sequences.
+//
+// If in -long mode, it then runs a test of all the prefix possibilities followed
+// by all possible 2-byte sequences.
+func testPrefix(t *testing.T, testfn func(*testing.T, func(func([]byte)))) {
+	t.Parallel()
+	testfn(t, concat(manyPrefixes, enum8bit))
+	if testing.Short() || !*longTest {
+		return
+	}
+
+	name := caller(2)
+	for i := 0; i < 256; i++ {
+		log.Printf("%s 16-bit: %d/256\n", name, i)
+		testfn(t, concat3(manyPrefixes, fixed(byte(i)), enum8bit))
+	}
+}
+
+func testPrefixREX(t *testing.T, testfn func(*testing.T, func(func([]byte)))) {
+	t.Parallel()
+	testfn(t, filter(concat3(manyPrefixes, rexPrefixes, enum8bit), isValidREX))
+	if testing.Short() || !*longTest {
+		return
+	}
+
+	name := caller(2)
+	for i := 0; i < 256; i++ {
+		log.Printf("%s 16-bit: %d/256\n", name, i)
+		testfn(t, filter(concat4(manyPrefixes, rexPrefixes, fixed(byte(i)), enum8bit), isValidREX))
+	}
+}
+
+func caller(skip int) string {
+	pc, _, _, _ := runtime.Caller(skip)
+	f := runtime.FuncForPC(pc)
+	name := "?"
+	if f != nil {
+		name = f.Name()
+		if i := strings.LastIndex(name, "."); i >= 0 {
+			name = name[i+1:]
+		}
+	}
+	return name
+}
+
+func isValidREX(x []byte) bool {
+	i := 0
+	for i < len(x) && isPrefixByte(x[i]) {
+		i++
+	}
+	if i < len(x) && Prefix(x[i]).IsREX() {
+		i++
+		if i < len(x) {
+			return !isPrefixByte(x[i]) && !Prefix(x[i]).IsREX()
+		}
+	}
+	return true
+}
+
+func isPrefixByte(b byte) bool {
+	switch b {
+	case 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65, 0x66, 0x67, 0xF0, 0xF2, 0xF3:
+		return true
+	}
+	return false
+}
--- a/src/cmd/internal/rsc.io/x86/x86asm/gnu.go
+++ b/src/cmd/internal/rsc.io/x86/x86asm/gnu.go
@ -0,0 +1,926 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"fmt"
+	"strings"
+)
+
+// GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils.
+// This general form is often called ``AT&T syntax'' as a reference to AT&T System V Unix.
+func GNUSyntax(inst Inst) string {
+	// Rewrite instruction to mimic GNU peculiarities.
+	// Note that inst has been passed by value and contains
+	// no pointers, so any changes we make here are local
+	// and will not propagate back out to the caller.
+
+	// Adjust opcode [sic].
+	switch inst.Op {
+	case FDIV, FDIVR, FSUB, FSUBR, FDIVP, FDIVRP, FSUBP, FSUBRP:
+		// DC E0, DC F0: libopcodes swaps FSUBR/FSUB and FDIVR/FDIV, at least
+		// if you believe the Intel manual is correct (the encoding is irregular as given;
+		// libopcodes uses the more regular expected encoding).
+		// TODO(rsc): Test to ensure Intel manuals are correct and report to libopcodes maintainers?
+		// NOTE: iant thinks this is deliberate, but we can't find the history.
+		_, reg1 := inst.Args[0].(Reg)
+		_, reg2 := inst.Args[1].(Reg)
+		if reg1 && reg2 && (inst.Opcode>>24 == 0xDC || inst.Opcode>>24 == 0xDE) {
+			switch inst.Op {
+			case FDIV:
+				inst.Op = FDIVR
+			case FDIVR:
+				inst.Op = FDIV
+			case FSUB:
+				inst.Op = FSUBR
+			case FSUBR:
+				inst.Op = FSUB
+			case FDIVP:
+				inst.Op = FDIVRP
+			case FDIVRP:
+				inst.Op = FDIVP
+			case FSUBP:
+				inst.Op = FSUBRP
+			case FSUBRP:
+				inst.Op = FSUBP
+			}
+		}
+
+	case MOVNTSD:
+		// MOVNTSD is F2 0F 2B /r.
+		// MOVNTSS is F3 0F 2B /r (supposedly; not in manuals).
+		// Usually inner prefixes win for display,
+		// so that F3 F2 0F 2B 11 is REP MOVNTSD
+		// and F2 F3 0F 2B 11 is REPN MOVNTSS.
+		// Libopcodes always prefers MOVNTSS regardless of prefix order.
+		if countPrefix(&inst, 0xF3) > 0 {
+			found := false
+			for i := len(inst.Prefix) - 1; i >= 0; i-- {
+				switch inst.Prefix[i] & 0xFF {
+				case 0xF3:
+					if !found {
+						found = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case 0xF2:
+					inst.Prefix[i] &^= PrefixImplicit
+				}
+			}
+			inst.Op = MOVNTSS
+		}
+	}
+
+	// Add implicit arguments.
+	switch inst.Op {
+	case MONITOR:
+		inst.Args[0] = EDX
+		inst.Args[1] = ECX
+		inst.Args[2] = EAX
+		if inst.AddrSize == 16 {
+			inst.Args[2] = AX
+		}
+
+	case MWAIT:
+		if inst.Mode == 64 {
+			inst.Args[0] = RCX
+			inst.Args[1] = RAX
+		} else {
+			inst.Args[0] = ECX
+			inst.Args[1] = EAX
+		}
+	}
+
+	// Adjust which prefixes will be displayed.
+	// The rule is to display all the prefixes not implied by
+	// the usual instruction display, that is, all the prefixes
+	// except the ones with PrefixImplicit set.
+	// However, of course, there are exceptions to the rule.
+	switch inst.Op {
+	case CRC32:
+		// CRC32 has a mandatory F2 prefix.
+		// If there are multiple F2s and no F3s, the extra F2s do not print.
+		// (And Decode has already marked them implicit.)
+		// However, if there is an F3 anywhere, then the extra F2s do print.
+		// If there are multiple F2 prefixes *and* an (ignored) F3,
+		// then libopcodes prints the extra F2s as REPNs.
+		if countPrefix(&inst, 0xF2) > 1 {
+			unmarkImplicit(&inst, 0xF2)
+			markLastImplicit(&inst, 0xF2)
+		}
+
+		// An unused data size override should probably be shown,
+		// to distinguish DATA16 CRC32B from plain CRC32B,
+		// but libopcodes always treats the final override as implicit
+		// and the others as explicit.
+		unmarkImplicit(&inst, PrefixDataSize)
+		markLastImplicit(&inst, PrefixDataSize)
+
+	case CVTSI2SD, CVTSI2SS:
+		if !isMem(inst.Args[1]) {
+			markLastImplicit(&inst, PrefixDataSize)
+		}
+
+	case CVTSD2SI, CVTSS2SI, CVTTSD2SI, CVTTSS2SI,
+		ENTER, FLDENV, FNSAVE, FNSTENV, FRSTOR, LGDT, LIDT, LRET,
+		POP, PUSH, RET, SGDT, SIDT, SYSRET, XBEGIN:
+		markLastImplicit(&inst, PrefixDataSize)
+
+	case LOOP, LOOPE, LOOPNE, MONITOR:
+		markLastImplicit(&inst, PrefixAddrSize)
+
+	case MOV:
+		// The 16-bit and 32-bit forms of MOV Sreg, dst and MOV src, Sreg
+		// cannot be distinguished when src or dst refers to memory, because
+		// Sreg is always a 16-bit value, even when we're doing a 32-bit
+		// instruction. Because the instruction tables distinguished these two,
+		// any operand size prefix has been marked as used (to decide which
+		// branch to take). Unmark it, so that it will show up in disassembly,
+		// so that the reader can tell the size of memory operand.
+		// up with the same arguments
+		dst, _ := inst.Args[0].(Reg)
+		src, _ := inst.Args[1].(Reg)
+		if ES <= src && src <= GS && isMem(inst.Args[0]) || ES <= dst && dst <= GS && isMem(inst.Args[1]) {
+			unmarkImplicit(&inst, PrefixDataSize)
+		}
+
+	case MOVDQU:
+		if countPrefix(&inst, 0xF3) > 1 {
+			unmarkImplicit(&inst, 0xF3)
+			markLastImplicit(&inst, 0xF3)
+		}
+
+	case MOVQ2DQ:
+		markLastImplicit(&inst, PrefixDataSize)
+
+	case SLDT, SMSW, STR, FXRSTOR, XRSTOR, XSAVE, XSAVEOPT, CMPXCHG8B:
+		if isMem(inst.Args[0]) {
+			unmarkImplicit(&inst, PrefixDataSize)
+		}
+
+	case SYSEXIT:
+		unmarkImplicit(&inst, PrefixDataSize)
+	}
+
+	if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
+		if countPrefix(&inst, PrefixCS) > 0 && countPrefix(&inst, PrefixDS) > 0 {
+			for i, p := range inst.Prefix {
+				switch p & 0xFFF {
+				case PrefixPN, PrefixPT:
+					inst.Prefix[i] &= 0xF0FF // cut interpretation bits, producing original segment prefix
+				}
+			}
+		}
+	}
+
+	// XACQUIRE/XRELEASE adjustment.
+	if inst.Op == MOV {
+		// MOV into memory is a candidate for turning REP into XRELEASE.
+		// However, if the REP is followed by a REPN, that REPN blocks the
+		// conversion.
+		haveREPN := false
+		for i := len(inst.Prefix) - 1; i >= 0; i-- {
+			switch inst.Prefix[i] &^ PrefixIgnored {
+			case PrefixREPN:
+				haveREPN = true
+			case PrefixXRELEASE:
+				if haveREPN {
+					inst.Prefix[i] = PrefixREP
+				}
+			}
+		}
+	}
+
+	// We only format the final F2/F3 as XRELEASE/XACQUIRE.
+	haveXA := false
+	haveXR := false
+	for i := len(inst.Prefix) - 1; i >= 0; i-- {
+		switch inst.Prefix[i] &^ PrefixIgnored {
+		case PrefixXRELEASE:
+			if !haveXR {
+				haveXR = true
+			} else {
+				inst.Prefix[i] = PrefixREP
+			}
+
+		case PrefixXACQUIRE:
+			if !haveXA {
+				haveXA = true
+			} else {
+				inst.Prefix[i] = PrefixREPN
+			}
+		}
+	}
+
+	// Determine opcode.
+	op := strings.ToLower(inst.Op.String())
+	if alt := gnuOp[inst.Op]; alt != "" {
+		op = alt
+	}
+
+	// Determine opcode suffix.
+	// Libopcodes omits the suffix if the width of the operation
+	// can be inferred from a register arguments. For example,
+	// add $1, %ebx has no suffix because you can tell from the
+	// 32-bit register destination that it is a 32-bit add,
+	// but in addl $1, (%ebx), the destination is memory, so the
+	// size is not evident without the l suffix.
+	needSuffix := true
+SuffixLoop:
+	for i, a := range inst.Args {
+		if a == nil {
+			break
+		}
+		switch a := a.(type) {
+		case Reg:
+			switch inst.Op {
+			case MOVSX, MOVZX:
+				continue
+
+			case SHL, SHR, RCL, RCR, ROL, ROR, SAR:
+				if i == 1 {
+					// shift count does not tell us operand size
+					continue
+				}
+
+			case CRC32:
+				// The source argument does tell us operand size,
+				// but libopcodes still always puts a suffix on crc32.
+				continue
+
+			case PUSH, POP:
+				// Even though segment registers are 16-bit, push and pop
+				// can save/restore them from 32-bit slots, so they
+				// do not imply operand size.
+				if ES <= a && a <= GS {
+					continue
+				}
+
+			case CVTSI2SD, CVTSI2SS:
+				// The integer register argument takes priority.
+				if X0 <= a && a <= X15 {
+					continue
+				}
+			}
+
+			if AL <= a && a <= R15 || ES <= a && a <= GS || X0 <= a && a <= X15 || M0 <= a && a <= M7 {
+				needSuffix = false
+				break SuffixLoop
+			}
+		}
+	}
+
+	if needSuffix {
+		switch inst.Op {
+		case CMPXCHG8B, FLDCW, FNSTCW, FNSTSW, LDMXCSR, LLDT, LMSW, LTR, PCLMULQDQ,
+			SETA, SETAE, SETB, SETBE, SETE, SETG, SETGE, SETL, SETLE, SETNE, SETNO, SETNP, SETNS, SETO, SETP, SETS,
+			SLDT, SMSW, STMXCSR, STR, VERR, VERW:
+			// For various reasons, libopcodes emits no suffix for these instructions.
+
+		case CRC32:
+			op += byteSizeSuffix(argBytes(&inst, inst.Args[1]))
+
+		case LGDT, LIDT, SGDT, SIDT:
+			op += byteSizeSuffix(inst.DataSize / 8)
+
+		case MOVZX, MOVSX:
+			// Integer size conversions get two suffixes.
+			op = op[:4] + byteSizeSuffix(argBytes(&inst, inst.Args[1])) + byteSizeSuffix(argBytes(&inst, inst.Args[0]))
+
+		case LOOP, LOOPE, LOOPNE:
+			// Add w suffix to indicate use of CX register instead of ECX.
+			if inst.AddrSize == 16 {
+				op += "w"
+			}
+
+		case CALL, ENTER, JMP, LCALL, LEAVE, LJMP, LRET, RET, SYSRET, XBEGIN:
+			// Add w suffix to indicate use of 16-bit target.
+			// Exclude JMP rel8.
+			if inst.Opcode>>24 == 0xEB {
+				break
+			}
+			if inst.DataSize == 16 && inst.Mode != 16 {
+				markLastImplicit(&inst, PrefixDataSize)
+				op += "w"
+			} else if inst.Mode == 64 {
+				op += "q"
+			}
+
+		case FRSTOR, FNSAVE, FNSTENV, FLDENV:
+			// Add s suffix to indicate shortened FPU state (I guess).
+			if inst.DataSize == 16 {
+				op += "s"
+			}
+
+		case PUSH, POP:
+			if markLastImplicit(&inst, PrefixDataSize) {
+				op += byteSizeSuffix(inst.DataSize / 8)
+			} else if inst.Mode == 64 {
+				op += "q"
+			} else {
+				op += byteSizeSuffix(inst.MemBytes)
+			}
+
+		default:
+			if isFloat(inst.Op) {
+				// I can't explain any of this, but it's what libopcodes does.
+				switch inst.MemBytes {
+				default:
+					if (inst.Op == FLD || inst.Op == FSTP) && isMem(inst.Args[0]) {
+						op += "t"
+					}
+				case 4:
+					if isFloatInt(inst.Op) {
+						op += "l"
+					} else {
+						op += "s"
+					}
+				case 8:
+					if isFloatInt(inst.Op) {
+						op += "ll"
+					} else {
+						op += "l"
+					}
+				}
+				break
+			}
+
+			op += byteSizeSuffix(inst.MemBytes)
+		}
+	}
+
+	// Adjust special case opcodes.
+	switch inst.Op {
+	case 0:
+		if inst.Prefix[0] != 0 {
+			return strings.ToLower(inst.Prefix[0].String())
+		}
+
+	case INT:
+		if inst.Opcode>>24 == 0xCC {
+			inst.Args[0] = nil
+			op = "int3"
+		}
+
+	case CMPPS, CMPPD, CMPSD_XMM, CMPSS:
+		imm, ok := inst.Args[2].(Imm)
+		if ok && 0 <= imm && imm < 8 {
+			inst.Args[2] = nil
+			op = cmppsOps[imm] + op[3:]
+		}
+
+	case PCLMULQDQ:
+		imm, ok := inst.Args[2].(Imm)
+		if ok && imm&^0x11 == 0 {
+			inst.Args[2] = nil
+			op = pclmulqOps[(imm&0x10)>>3|(imm&1)]
+		}
+
+	case XLATB:
+		if markLastImplicit(&inst, PrefixAddrSize) {
+			op = "xlat" // not xlatb
+		}
+	}
+
+	// Build list of argument strings.
+	var (
+		usedPrefixes bool     // segment prefixes consumed by Mem formatting
+		args         []string // formatted arguments
+	)
+	for i, a := range inst.Args {
+		if a == nil {
+			break
+		}
+		switch inst.Op {
+		case MOVSB, MOVSW, MOVSD, MOVSQ, OUTSB, OUTSW, OUTSD:
+			if i == 0 {
+				usedPrefixes = true // disable use of prefixes for first argument
+			} else {
+				usedPrefixes = false
+			}
+		}
+		if a == Imm(1) && (inst.Opcode>>24)&^1 == 0xD0 {
+			continue
+		}
+		args = append(args, gnuArg(&inst, a, &usedPrefixes))
+	}
+
+	// The default is to print the arguments in reverse Intel order.
+	// A few instructions inhibit this behavior.
+	switch inst.Op {
+	case BOUND, LCALL, ENTER, LJMP:
+		// no reverse
+	default:
+		// reverse args
+		for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 {
+			args[i], args[j] = args[j], args[i]
+		}
+	}
+
+	// Build prefix string.
+	// Must be after argument formatting, which can turn off segment prefixes.
+	var (
+		prefix       = "" // output string
+		numAddr      = 0
+		numData      = 0
+		implicitData = false
+	)
+	for _, p := range inst.Prefix {
+		if p&0xFF == PrefixDataSize && p&PrefixImplicit != 0 {
+			implicitData = true
+		}
+	}
+	for _, p := range inst.Prefix {
+		if p == 0 {
+			break
+		}
+		if p&PrefixImplicit != 0 {
+			continue
+		}
+		switch p &^ (PrefixIgnored | PrefixInvalid) {
+		default:
+			if p.IsREX() {
+				if p&0xFF == PrefixREX {
+					prefix += "rex "
+				} else {
+					prefix += "rex." + p.String()[4:] + " "
+				}
+				break
+			}
+			prefix += strings.ToLower(p.String()) + " "
+
+		case PrefixPN:
+			op += ",pn"
+			continue
+
+		case PrefixPT:
+			op += ",pt"
+			continue
+
+		case PrefixAddrSize, PrefixAddr16, PrefixAddr32:
+			// For unknown reasons, if the addr16 prefix is repeated,
+			// libopcodes displays all but the last as addr32, even though
+			// the addressing form used in a memory reference is clearly
+			// still 16-bit.
+			n := 32
+			if inst.Mode == 32 {
+				n = 16
+			}
+			numAddr++
+			if countPrefix(&inst, PrefixAddrSize) > numAddr {
+				n = inst.Mode
+			}
+			prefix += fmt.Sprintf("addr%d ", n)
+			continue
+
+		case PrefixData16, PrefixData32:
+			if implicitData && countPrefix(&inst, PrefixDataSize) > 1 {
+				// Similar to the addr32 logic above, but it only kicks in
+				// when something used the data size prefix (one is implicit).
+				n := 16
+				if inst.Mode == 16 {
+					n = 32
+				}
+				numData++
+				if countPrefix(&inst, PrefixDataSize) > numData {
+					if inst.Mode == 16 {
+						n = 16
+					} else {
+						n = 32
+					}
+				}
+				prefix += fmt.Sprintf("data%d ", n)
+				continue
+			}
+			prefix += strings.ToLower(p.String()) + " "
+		}
+	}
+
+	// Finally! Put it all together.
+	text := prefix + op
+	if args != nil {
+		text += " "
+		// Indirect call/jmp gets a star to distinguish from direct jump address.
+		if (inst.Op == CALL || inst.Op == JMP || inst.Op == LJMP || inst.Op == LCALL) && (isMem(inst.Args[0]) || isReg(inst.Args[0])) {
+			text += "*"
+		}
+		text += strings.Join(args, ",")
+	}
+	return text
+}
+
+// gnuArg returns the GNU syntax for the argument x from the instruction inst.
+// If *usedPrefixes is false and x is a Mem, then the formatting
+// includes any segment prefixes and sets *usedPrefixes to true.
+func gnuArg(inst *Inst, x Arg, usedPrefixes *bool) string {
+	if x == nil {
+		return "<nil>"
+	}
+	switch x := x.(type) {
+	case Reg:
+		switch inst.Op {
+		case CVTSI2SS, CVTSI2SD, CVTSS2SI, CVTSD2SI, CVTTSD2SI, CVTTSS2SI:
+			if inst.DataSize == 16 && EAX <= x && x <= R15L {
+				x -= EAX - AX
+			}
+
+		case IN, INSB, INSW, INSD, OUT, OUTSB, OUTSW, OUTSD:
+			// DX is the port, but libopcodes prints it as if it were a memory reference.
+			if x == DX {
+				return "(%dx)"
+			}
+		}
+		return gccRegName[x]
+	case Mem:
+		seg := ""
+		var haveCS, haveDS, haveES, haveFS, haveGS, haveSS bool
+		switch x.Segment {
+		case CS:
+			haveCS = true
+		case DS:
+			haveDS = true
+		case ES:
+			haveES = true
+		case FS:
+			haveFS = true
+		case GS:
+			haveGS = true
+		case SS:
+			haveSS = true
+		}
+		switch inst.Op {
+		case INSB, INSW, INSD, STOSB, STOSW, STOSD, STOSQ, SCASB, SCASW, SCASD, SCASQ:
+			// These do not accept segment prefixes, at least in the GNU rendering.
+		default:
+			if *usedPrefixes {
+				break
+			}
+			for i := len(inst.Prefix) - 1; i >= 0; i-- {
+				p := inst.Prefix[i] &^ PrefixIgnored
+				if p == 0 {
+					continue
+				}
+				switch p {
+				case PrefixCS:
+					if !haveCS {
+						haveCS = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case PrefixDS:
+					if !haveDS {
+						haveDS = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case PrefixES:
+					if !haveES {
+						haveES = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case PrefixFS:
+					if !haveFS {
+						haveFS = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case PrefixGS:
+					if !haveGS {
+						haveGS = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case PrefixSS:
+					if !haveSS {
+						haveSS = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				}
+			}
+			*usedPrefixes = true
+		}
+		if haveCS {
+			seg += "%cs:"
+		}
+		if haveDS {
+			seg += "%ds:"
+		}
+		if haveSS {
+			seg += "%ss:"
+		}
+		if haveES {
+			seg += "%es:"
+		}
+		if haveFS {
+			seg += "%fs:"
+		}
+		if haveGS {
+			seg += "%gs:"
+		}
+		disp := ""
+		if x.Disp != 0 {
+			disp = fmt.Sprintf("%#x", x.Disp)
+		}
+		if x.Scale == 0 || x.Index == 0 && x.Scale == 1 && (x.Base == ESP || x.Base == RSP || x.Base == 0 && inst.Mode == 64) {
+			if x.Base == 0 {
+				return seg + disp
+			}
+			return fmt.Sprintf("%s%s(%s)", seg, disp, gccRegName[x.Base])
+		}
+		base := gccRegName[x.Base]
+		if x.Base == 0 {
+			base = ""
+		}
+		index := gccRegName[x.Index]
+		if x.Index == 0 {
+			if inst.AddrSize == 64 {
+				index = "%riz"
+			} else {
+				index = "%eiz"
+			}
+		}
+		if AX <= x.Base && x.Base <= DI {
+			// 16-bit addressing - no scale
+			return fmt.Sprintf("%s%s(%s,%s)", seg, disp, base, index)
+		}
+		return fmt.Sprintf("%s%s(%s,%s,%d)", seg, disp, base, index, x.Scale)
+	case Rel:
+		return fmt.Sprintf(".%+#x", int32(x))
+	case Imm:
+		if inst.Mode == 32 {
+			return fmt.Sprintf("$%#x", uint32(x))
+		}
+		return fmt.Sprintf("$%#x", int64(x))
+	}
+	return x.String()
+}
+
+var gccRegName = [...]string{
+	0:    "REG0",
+	AL:   "%al",
+	CL:   "%cl",
+	BL:   "%bl",
+	DL:   "%dl",
+	AH:   "%ah",
+	CH:   "%ch",
+	BH:   "%bh",
+	DH:   "%dh",
+	SPB:  "%spl",
+	BPB:  "%bpl",
+	SIB:  "%sil",
+	DIB:  "%dil",
+	R8B:  "%r8b",
+	R9B:  "%r9b",
+	R10B: "%r10b",
+	R11B: "%r11b",
+	R12B: "%r12b",
+	R13B: "%r13b",
+	R14B: "%r14b",
+	R15B: "%r15b",
+	AX:   "%ax",
+	CX:   "%cx",
+	BX:   "%bx",
+	DX:   "%dx",
+	SP:   "%sp",
+	BP:   "%bp",
+	SI:   "%si",
+	DI:   "%di",
+	R8W:  "%r8w",
+	R9W:  "%r9w",
+	R10W: "%r10w",
+	R11W: "%r11w",
+	R12W: "%r12w",
+	R13W: "%r13w",
+	R14W: "%r14w",
+	R15W: "%r15w",
+	EAX:  "%eax",
+	ECX:  "%ecx",
+	EDX:  "%edx",
+	EBX:  "%ebx",
+	ESP:  "%esp",
+	EBP:  "%ebp",
+	ESI:  "%esi",
+	EDI:  "%edi",
+	R8L:  "%r8d",
+	R9L:  "%r9d",
+	R10L: "%r10d",
+	R11L: "%r11d",
+	R12L: "%r12d",
+	R13L: "%r13d",
+	R14L: "%r14d",
+	R15L: "%r15d",
+	RAX:  "%rax",
+	RCX:  "%rcx",
+	RDX:  "%rdx",
+	RBX:  "%rbx",
+	RSP:  "%rsp",
+	RBP:  "%rbp",
+	RSI:  "%rsi",
+	RDI:  "%rdi",
+	R8:   "%r8",
+	R9:   "%r9",
+	R10:  "%r10",
+	R11:  "%r11",
+	R12:  "%r12",
+	R13:  "%r13",
+	R14:  "%r14",
+	R15:  "%r15",
+	IP:   "%ip",
+	EIP:  "%eip",
+	RIP:  "%rip",
+	F0:   "%st",
+	F1:   "%st(1)",
+	F2:   "%st(2)",
+	F3:   "%st(3)",
+	F4:   "%st(4)",
+	F5:   "%st(5)",
+	F6:   "%st(6)",
+	F7:   "%st(7)",
+	M0:   "%mm0",
+	M1:   "%mm1",
+	M2:   "%mm2",
+	M3:   "%mm3",
+	M4:   "%mm4",
+	M5:   "%mm5",
+	M6:   "%mm6",
+	M7:   "%mm7",
+	X0:   "%xmm0",
+	X1:   "%xmm1",
+	X2:   "%xmm2",
+	X3:   "%xmm3",
+	X4:   "%xmm4",
+	X5:   "%xmm5",
+	X6:   "%xmm6",
+	X7:   "%xmm7",
+	X8:   "%xmm8",
+	X9:   "%xmm9",
+	X10:  "%xmm10",
+	X11:  "%xmm11",
+	X12:  "%xmm12",
+	X13:  "%xmm13",
+	X14:  "%xmm14",
+	X15:  "%xmm15",
+	CS:   "%cs",
+	SS:   "%ss",
+	DS:   "%ds",
+	ES:   "%es",
+	FS:   "%fs",
+	GS:   "%gs",
+	GDTR: "%gdtr",
+	IDTR: "%idtr",
+	LDTR: "%ldtr",
+	MSW:  "%msw",
+	TASK: "%task",
+	CR0:  "%cr0",
+	CR1:  "%cr1",
+	CR2:  "%cr2",
+	CR3:  "%cr3",
+	CR4:  "%cr4",
+	CR5:  "%cr5",
+	CR6:  "%cr6",
+	CR7:  "%cr7",
+	CR8:  "%cr8",
+	CR9:  "%cr9",
+	CR10: "%cr10",
+	CR11: "%cr11",
+	CR12: "%cr12",
+	CR13: "%cr13",
+	CR14: "%cr14",
+	CR15: "%cr15",
+	DR0:  "%db0",
+	DR1:  "%db1",
+	DR2:  "%db2",
+	DR3:  "%db3",
+	DR4:  "%db4",
+	DR5:  "%db5",
+	DR6:  "%db6",
+	DR7:  "%db7",
+	TR0:  "%tr0",
+	TR1:  "%tr1",
+	TR2:  "%tr2",
+	TR3:  "%tr3",
+	TR4:  "%tr4",
+	TR5:  "%tr5",
+	TR6:  "%tr6",
+	TR7:  "%tr7",
+}
+
+var gnuOp = map[Op]string{
+	CBW:       "cbtw",
+	CDQ:       "cltd",
+	CMPSD:     "cmpsl",
+	CMPSD_XMM: "cmpsd",
+	CWD:       "cwtd",
+	CWDE:      "cwtl",
+	CQO:       "cqto",
+	INSD:      "insl",
+	IRET:      "iretw",
+	IRETD:     "iret",
+	IRETQ:     "iretq",
+	LODSB:     "lods",
+	LODSD:     "lods",
+	LODSQ:     "lods",
+	LODSW:     "lods",
+	MOVSD:     "movsl",
+	MOVSD_XMM: "movsd",
+	OUTSD:     "outsl",
+	POPA:      "popaw",
+	POPAD:     "popa",
+	POPF:      "popfw",
+	POPFD:     "popf",
+	PUSHA:     "pushaw",
+	PUSHAD:    "pusha",
+	PUSHF:     "pushfw",
+	PUSHFD:    "pushf",
+	SCASB:     "scas",
+	SCASD:     "scas",
+	SCASQ:     "scas",
+	SCASW:     "scas",
+	STOSB:     "stos",
+	STOSD:     "stos",
+	STOSQ:     "stos",
+	STOSW:     "stos",
+	XLATB:     "xlat",
+}
+
+var cmppsOps = []string{
+	"cmpeq",
+	"cmplt",
+	"cmple",
+	"cmpunord",
+	"cmpneq",
+	"cmpnlt",
+	"cmpnle",
+	"cmpord",
+}
+
+var pclmulqOps = []string{
+	"pclmullqlqdq",
+	"pclmulhqlqdq",
+	"pclmullqhqdq",
+	"pclmulhqhqdq",
+}
+
+func countPrefix(inst *Inst, target Prefix) int {
+	n := 0
+	for _, p := range inst.Prefix {
+		if p&0xFF == target&0xFF {
+			n++
+		}
+	}
+	return n
+}
+
+func markLastImplicit(inst *Inst, prefix Prefix) bool {
+	for i := len(inst.Prefix) - 1; i >= 0; i-- {
+		p := inst.Prefix[i]
+		if p&0xFF == prefix {
+			inst.Prefix[i] |= PrefixImplicit
+			return true
+		}
+	}
+	return false
+}
+
+func unmarkImplicit(inst *Inst, prefix Prefix) {
+	for i := len(inst.Prefix) - 1; i >= 0; i-- {
+		p := inst.Prefix[i]
+		if p&0xFF == prefix {
+			inst.Prefix[i] &^= PrefixImplicit
+		}
+	}
+}
+
+func byteSizeSuffix(b int) string {
+	switch b {
+	case 1:
+		return "b"
+	case 2:
+		return "w"
+	case 4:
+		return "l"
+	case 8:
+		return "q"
+	}
+	return ""
+}
+
+func argBytes(inst *Inst, arg Arg) int {
+	if isMem(arg) {
+		return inst.MemBytes
+	}
+	return regBytes(arg)
+}
+
+func isFloat(op Op) bool {
+	switch op {
+	case FADD, FCOM, FCOMP, FDIV, FDIVR, FIADD, FICOM, FICOMP, FIDIV, FIDIVR, FILD, FIMUL, FIST, FISTP, FISTTP, FISUB, FISUBR, FLD, FMUL, FST, FSTP, FSUB, FSUBR:
+		return true
+	}
+	return false
+}
+
+func isFloatInt(op Op) bool {
+	switch op {
+	case FIADD, FICOM, FICOMP, FIDIV, FIDIVR, FILD, FIMUL, FIST, FISTP, FISTTP, FISUB, FISUBR:
+		return true
+	}
+	return false
+}
--- a/src/cmd/internal/rsc.io/x86/x86asm/inst.go
+++ b/src/cmd/internal/rsc.io/x86/x86asm/inst.go
@ -0,0 +1,641 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package x86asm implements decoding of x86 machine code.
+package x86asm
+
+import (
+	"bytes"
+	"fmt"
+)
+
+// An Inst is a single instruction.
+type Inst struct {
+	Prefix   Prefixes // Prefixes applied to the instruction.
+	Op       Op       // Opcode mnemonic
+	Opcode   uint32   // Encoded opcode bits, left aligned (first byte is Opcode>>24, etc)
+	Args     Args     // Instruction arguments, in Intel order
+	Mode     int      // processor mode in bits: 16, 32, or 64
+	AddrSize int      // address size in bits: 16, 32, or 64
+	DataSize int      // operand size in bits: 16, 32, or 64
+	MemBytes int      // size of memory argument in bytes: 1, 2, 4, 8, 16, and so on.
+	Len      int      // length of encoded instruction in bytes
+}
+
+// Prefixes is an array of prefixes associated with a single instruction.
+// The prefixes are listed in the same order as found in the instruction:
+// each prefix byte corresponds to one slot in the array. The first zero
+// in the array marks the end of the prefixes.
+type Prefixes [14]Prefix
+
+// A Prefix represents an Intel instruction prefix.
+// The low 8 bits are the actual prefix byte encoding,
+// and the top 8 bits contain distinguishing bits and metadata.
+type Prefix uint16
+
+const (
+	// Metadata about the role of a prefix in an instruction.
+	PrefixImplicit Prefix = 0x8000 // prefix is implied by instruction text
+	PrefixIgnored  Prefix = 0x4000 // prefix is ignored: either irrelevant or overridden by a later prefix
+	PrefixInvalid  Prefix = 0x2000 // prefix makes entire instruction invalid (bad LOCK)
+
+	// Memory segment overrides.
+	PrefixES Prefix = 0x26 // ES segment override
+	PrefixCS Prefix = 0x2E // CS segment override
+	PrefixSS Prefix = 0x36 // SS segment override
+	PrefixDS Prefix = 0x3E // DS segment override
+	PrefixFS Prefix = 0x64 // FS segment override
+	PrefixGS Prefix = 0x65 // GS segment override
+
+	// Branch prediction.
+	PrefixPN Prefix = 0x12E // predict not taken (conditional branch only)
+	PrefixPT Prefix = 0x13E // predict taken (conditional branch only)
+
+	// Size attributes.
+	PrefixDataSize Prefix = 0x66 // operand size override
+	PrefixData16   Prefix = 0x166
+	PrefixData32   Prefix = 0x266
+	PrefixAddrSize Prefix = 0x67 // address size override
+	PrefixAddr16   Prefix = 0x167
+	PrefixAddr32   Prefix = 0x267
+
+	// One of a kind.
+	PrefixLOCK     Prefix = 0xF0 // lock
+	PrefixREPN     Prefix = 0xF2 // repeat not zero
+	PrefixXACQUIRE Prefix = 0x1F2
+	PrefixBND      Prefix = 0x2F2
+	PrefixREP      Prefix = 0xF3 // repeat
+	PrefixXRELEASE Prefix = 0x1F3
+
+	// The REX prefixes must be in the range [PrefixREX, PrefixREX+0x10).
+	// the other bits are set or not according to the intended use.
+	PrefixREX  Prefix = 0x40 // REX 64-bit extension prefix
+	PrefixREXW Prefix = 0x08 // extension bit W (64-bit instruction width)
+	PrefixREXR Prefix = 0x04 // extension bit R (r field in modrm)
+	PrefixREXX Prefix = 0x02 // extension bit X (index field in sib)
+	PrefixREXB Prefix = 0x01 // extension bit B (r/m field in modrm or base field in sib)
+)
+
+// IsREX reports whether p is a REX prefix byte.
+func (p Prefix) IsREX() bool {
+	return p&0xF0 == PrefixREX
+}
+
+func (p Prefix) String() string {
+	p &^= PrefixImplicit | PrefixIgnored | PrefixInvalid
+	if s := prefixNames[p]; s != "" {
+		return s
+	}
+
+	if p.IsREX() {
+		s := "REX."
+		if p&PrefixREXW != 0 {
+			s += "W"
+		}
+		if p&PrefixREXR != 0 {
+			s += "R"
+		}
+		if p&PrefixREXX != 0 {
+			s += "X"
+		}
+		if p&PrefixREXB != 0 {
+			s += "B"
+		}
+		return s
+	}
+
+	return fmt.Sprintf("Prefix(%#x)", int(p))
+}
+
+// An Op is an x86 opcode.
+type Op uint32
+
+func (op Op) String() string {
+	i := int(op)
+	if i < 0 || i >= len(opNames) || opNames[i] == "" {
+		return fmt.Sprintf("Op(%d)", i)
+	}
+	return opNames[i]
+}
+
+// An Args holds the instruction arguments.
+// If an instruction has fewer than 4 arguments,
+// the final elements in the array are nil.
+type Args [4]Arg
+
+// An Arg is a single instruction argument,
+// one of these types: Reg, Mem, Imm, Rel.
+type Arg interface {
+	String() string
+	isArg()
+}
+
+// Note that the implements of Arg that follow are all sized
+// so that on a 64-bit machine the data can be inlined in
+// the interface value instead of requiring an allocation.
+
+// A Reg is a single register.
+// The zero Reg value has no name but indicates ``no register.''
+type Reg uint8
+
+const (
+	_ Reg = iota
+
+	// 8-bit
+	AL
+	CL
+	DL
+	BL
+	AH
+	CH
+	DH
+	BH
+	SPB
+	BPB
+	SIB
+	DIB
+	R8B
+	R9B
+	R10B
+	R11B
+	R12B
+	R13B
+	R14B
+	R15B
+
+	// 16-bit
+	AX
+	CX
+	DX
+	BX
+	SP
+	BP
+	SI
+	DI
+	R8W
+	R9W
+	R10W
+	R11W
+	R12W
+	R13W
+	R14W
+	R15W
+
+	// 32-bit
+	EAX
+	ECX
+	EDX
+	EBX
+	ESP
+	EBP
+	ESI
+	EDI
+	R8L
+	R9L
+	R10L
+	R11L
+	R12L
+	R13L
+	R14L
+	R15L
+
+	// 64-bit
+	RAX
+	RCX
+	RDX
+	RBX
+	RSP
+	RBP
+	RSI
+	RDI
+	R8
+	R9
+	R10
+	R11
+	R12
+	R13
+	R14
+	R15
+
+	// Instruction pointer.
+	IP  // 16-bit
+	EIP // 32-bit
+	RIP // 64-bit
+
+	// 387 floating point registers.
+	F0
+	F1
+	F2
+	F3
+	F4
+	F5
+	F6
+	F7
+
+	// MMX registers.
+	M0
+	M1
+	M2
+	M3
+	M4
+	M5
+	M6
+	M7
+
+	// XMM registers.
+	X0
+	X1
+	X2
+	X3
+	X4
+	X5
+	X6
+	X7
+	X8
+	X9
+	X10
+	X11
+	X12
+	X13
+	X14
+	X15
+
+	// Segment registers.
+	ES
+	CS
+	SS
+	DS
+	FS
+	GS
+
+	// System registers.
+	GDTR
+	IDTR
+	LDTR
+	MSW
+	TASK
+
+	// Control registers.
+	CR0
+	CR1
+	CR2
+	CR3
+	CR4
+	CR5
+	CR6
+	CR7
+	CR8
+	CR9
+	CR10
+	CR11
+	CR12
+	CR13
+	CR14
+	CR15
+
+	// Debug registers.
+	DR0
+	DR1
+	DR2
+	DR3
+	DR4
+	DR5
+	DR6
+	DR7
+	DR8
+	DR9
+	DR10
+	DR11
+	DR12
+	DR13
+	DR14
+	DR15
+
+	// Task registers.
+	TR0
+	TR1
+	TR2
+	TR3
+	TR4
+	TR5
+	TR6
+	TR7
+)
+
+const regMax = TR7
+
+func (Reg) isArg() {}
+
+func (r Reg) String() string {
+	i := int(r)
+	if i < 0 || i >= len(regNames) || regNames[i] == "" {
+		return fmt.Sprintf("Reg(%d)", i)
+	}
+	return regNames[i]
+}
+
+// A Mem is a memory reference.
+// The general form is Segment:[Base+Scale*Index+Disp].
+type Mem struct {
+	Segment Reg
+	Base    Reg
+	Scale   uint8
+	Index   Reg
+	Disp    int64
+}
+
+func (Mem) isArg() {}
+
+func (m Mem) String() string {
+	var base, plus, scale, index, disp string
+
+	if m.Base != 0 {
+		base = m.Base.String()
+	}
+	if m.Scale != 0 {
+		if m.Base != 0 {
+			plus = "+"
+		}
+		if m.Scale > 1 {
+			scale = fmt.Sprintf("%d*", m.Scale)
+		}
+		index = m.Index.String()
+	}
+	if m.Disp != 0 || m.Base == 0 && m.Scale == 0 {
+		disp = fmt.Sprintf("%+#x", m.Disp)
+	}
+	return "[" + base + plus + scale + index + disp + "]"
+}
+
+// A Rel is an offset relative to the current instruction pointer.
+type Rel int32
+
+func (Rel) isArg() {}
+
+func (r Rel) String() string {
+	return fmt.Sprintf(".%+d", r)
+}
+
+// An Imm is an integer constant.
+type Imm int64
+
+func (Imm) isArg() {}
+
+func (i Imm) String() string {
+	return fmt.Sprintf("%#x", int64(i))
+}
+
+func (i Inst) String() string {
+	var buf bytes.Buffer
+	for _, p := range i.Prefix {
+		if p == 0 {
+			break
+		}
+		if p&PrefixImplicit != 0 {
+			continue
+		}
+		fmt.Fprintf(&buf, "%v ", p)
+	}
+	fmt.Fprintf(&buf, "%v", i.Op)
+	sep := " "
+	for _, v := range i.Args {
+		if v == nil {
+			break
+		}
+		fmt.Fprintf(&buf, "%s%v", sep, v)
+		sep = ", "
+	}
+	return buf.String()
+}
+
+func isReg(a Arg) bool {
+	_, ok := a.(Reg)
+	return ok
+}
+
+func isSegReg(a Arg) bool {
+	r, ok := a.(Reg)
+	return ok && ES <= r && r <= GS
+}
+
+func isMem(a Arg) bool {
+	_, ok := a.(Mem)
+	return ok
+}
+
+func isImm(a Arg) bool {
+	_, ok := a.(Imm)
+	return ok
+}
+
+func regBytes(a Arg) int {
+	r, ok := a.(Reg)
+	if !ok {
+		return 0
+	}
+	if AL <= r && r <= R15B {
+		return 1
+	}
+	if AX <= r && r <= R15W {
+		return 2
+	}
+	if EAX <= r && r <= R15L {
+		return 4
+	}
+	if RAX <= r && r <= R15 {
+		return 8
+	}
+	return 0
+}
+
+func isSegment(p Prefix) bool {
+	switch p {
+	case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
+		return true
+	}
+	return false
+}
+
+// The Op definitions and string list are in tables.go.
+
+var prefixNames = map[Prefix]string{
+	PrefixCS:       "CS",
+	PrefixDS:       "DS",
+	PrefixES:       "ES",
+	PrefixFS:       "FS",
+	PrefixGS:       "GS",
+	PrefixSS:       "SS",
+	PrefixLOCK:     "LOCK",
+	PrefixREP:      "REP",
+	PrefixREPN:     "REPN",
+	PrefixAddrSize: "ADDRSIZE",
+	PrefixDataSize: "DATASIZE",
+	PrefixAddr16:   "ADDR16",
+	PrefixData16:   "DATA16",
+	PrefixAddr32:   "ADDR32",
+	PrefixData32:   "DATA32",
+	PrefixBND:      "BND",
+	PrefixXACQUIRE: "XACQUIRE",
+	PrefixXRELEASE: "XRELEASE",
+	PrefixREX:      "REX",
+	PrefixPT:       "PT",
+	PrefixPN:       "PN",
+}
+
+var regNames = [...]string{
+	AL:   "AL",
+	CL:   "CL",
+	BL:   "BL",
+	DL:   "DL",
+	AH:   "AH",
+	CH:   "CH",
+	BH:   "BH",
+	DH:   "DH",
+	SPB:  "SPB",
+	BPB:  "BPB",
+	SIB:  "SIB",
+	DIB:  "DIB",
+	R8B:  "R8B",
+	R9B:  "R9B",
+	R10B: "R10B",
+	R11B: "R11B",
+	R12B: "R12B",
+	R13B: "R13B",
+	R14B: "R14B",
+	R15B: "R15B",
+	AX:   "AX",
+	CX:   "CX",
+	BX:   "BX",
+	DX:   "DX",
+	SP:   "SP",
+	BP:   "BP",
+	SI:   "SI",
+	DI:   "DI",
+	R8W:  "R8W",
+	R9W:  "R9W",
+	R10W: "R10W",
+	R11W: "R11W",
+	R12W: "R12W",
+	R13W: "R13W",
+	R14W: "R14W",
+	R15W: "R15W",
+	EAX:  "EAX",
+	ECX:  "ECX",
+	EDX:  "EDX",
+	EBX:  "EBX",
+	ESP:  "ESP",
+	EBP:  "EBP",
+	ESI:  "ESI",
+	EDI:  "EDI",
+	R8L:  "R8L",
+	R9L:  "R9L",
+	R10L: "R10L",
+	R11L: "R11L",
+	R12L: "R12L",
+	R13L: "R13L",
+	R14L: "R14L",
+	R15L: "R15L",
+	RAX:  "RAX",
+	RCX:  "RCX",
+	RDX:  "RDX",
+	RBX:  "RBX",
+	RSP:  "RSP",
+	RBP:  "RBP",
+	RSI:  "RSI",
+	RDI:  "RDI",
+	R8:   "R8",
+	R9:   "R9",
+	R10:  "R10",
+	R11:  "R11",
+	R12:  "R12",
+	R13:  "R13",
+	R14:  "R14",
+	R15:  "R15",
+	IP:   "IP",
+	EIP:  "EIP",
+	RIP:  "RIP",
+	F0:   "F0",
+	F1:   "F1",
+	F2:   "F2",
+	F3:   "F3",
+	F4:   "F4",
+	F5:   "F5",
+	F6:   "F6",
+	F7:   "F7",
+	M0:   "M0",
+	M1:   "M1",
+	M2:   "M2",
+	M3:   "M3",
+	M4:   "M4",
+	M5:   "M5",
+	M6:   "M6",
+	M7:   "M7",
+	X0:   "X0",
+	X1:   "X1",
+	X2:   "X2",
+	X3:   "X3",
+	X4:   "X4",
+	X5:   "X5",
+	X6:   "X6",
+	X7:   "X7",
+	X8:   "X8",
+	X9:   "X9",
+	X10:  "X10",
+	X11:  "X11",
+	X12:  "X12",
+	X13:  "X13",
+	X14:  "X14",
+	X15:  "X15",
+	CS:   "CS",
+	SS:   "SS",
+	DS:   "DS",
+	ES:   "ES",
+	FS:   "FS",
+	GS:   "GS",
+	GDTR: "GDTR",
+	IDTR: "IDTR",
+	LDTR: "LDTR",
+	MSW:  "MSW",
+	TASK: "TASK",
+	CR0:  "CR0",
+	CR1:  "CR1",
+	CR2:  "CR2",
+	CR3:  "CR3",
+	CR4:  "CR4",
+	CR5:  "CR5",
+	CR6:  "CR6",
+	CR7:  "CR7",
+	CR8:  "CR8",
+	CR9:  "CR9",
+	CR10: "CR10",
+	CR11: "CR11",
+	CR12: "CR12",
+	CR13: "CR13",
+	CR14: "CR14",
+	CR15: "CR15",
+	DR0:  "DR0",
+	DR1:  "DR1",
+	DR2:  "DR2",
+	DR3:  "DR3",
+	DR4:  "DR4",
+	DR5:  "DR5",
+	DR6:  "DR6",
+	DR7:  "DR7",
+	DR8:  "DR8",
+	DR9:  "DR9",
+	DR10: "DR10",
+	DR11: "DR11",
+	DR12: "DR12",
+	DR13: "DR13",
+	DR14: "DR14",
+	DR15: "DR15",
+	TR0:  "TR0",
+	TR1:  "TR1",
+	TR2:  "TR2",
+	TR3:  "TR3",
+	TR4:  "TR4",
+	TR5:  "TR5",
+	TR6:  "TR6",
+	TR7:  "TR7",
+}
--- a/src/cmd/internal/rsc.io/x86/x86asm/inst_test.go
+++ b/src/cmd/internal/rsc.io/x86/x86asm/inst_test.go
@ -0,0 +1,20 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestRegString(t *testing.T) {
+	for r := Reg(1); r <= regMax; r++ {
+		if regNames[r] == "" {
+			t.Errorf("regNames[%d] is missing", int(r))
+		} else if s := r.String(); strings.Contains(s, "Reg(") {
+			t.Errorf("Reg(%d).String() = %s, want proper name", int(r), s)
+		}
+	}
+}
--- a/src/cmd/internal/rsc.io/x86/x86asm/intel.go
+++ b/src/cmd/internal/rsc.io/x86/x86asm/intel.go
@ -0,0 +1,518 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"fmt"
+	"strings"
+)
+
+// IntelSyntax returns the Intel assembler syntax for the instruction, as defined by Intel's XED tool.
+func IntelSyntax(inst Inst) string {
+	var iargs []Arg
+	for _, a := range inst.Args {
+		if a == nil {
+			break
+		}
+		iargs = append(iargs, a)
+	}
+
+	switch inst.Op {
+	case INSB, INSD, INSW, OUTSB, OUTSD, OUTSW, LOOPNE, JCXZ, JECXZ, JRCXZ, LOOP, LOOPE, MOV, XLATB:
+		if inst.Op == MOV && (inst.Opcode>>16)&0xFFFC != 0x0F20 {
+			break
+		}
+		for i, p := range inst.Prefix {
+			if p&0xFF == PrefixAddrSize {
+				inst.Prefix[i] &^= PrefixImplicit
+			}
+		}
+	}
+
+	switch inst.Op {
+	case MOV:
+		dst, _ := inst.Args[0].(Reg)
+		src, _ := inst.Args[1].(Reg)
+		if ES <= dst && dst <= GS && EAX <= src && src <= R15L {
+			src -= EAX - AX
+			iargs[1] = src
+		}
+		if ES <= dst && dst <= GS && RAX <= src && src <= R15 {
+			src -= RAX - AX
+			iargs[1] = src
+		}
+
+		if inst.Opcode>>24&^3 == 0xA0 {
+			for i, p := range inst.Prefix {
+				if p&0xFF == PrefixAddrSize {
+					inst.Prefix[i] |= PrefixImplicit
+				}
+			}
+		}
+	}
+
+	switch inst.Op {
+	case AAM, AAD:
+		if imm, ok := iargs[0].(Imm); ok {
+			if inst.DataSize == 32 {
+				iargs[0] = Imm(uint32(int8(imm)))
+			} else if inst.DataSize == 16 {
+				iargs[0] = Imm(uint16(int8(imm)))
+			}
+		}
+
+	case PUSH:
+		if imm, ok := iargs[0].(Imm); ok {
+			iargs[0] = Imm(uint32(imm))
+		}
+	}
+
+	for _, p := range inst.Prefix {
+		if p&PrefixImplicit != 0 {
+			for j, pj := range inst.Prefix {
+				if pj&0xFF == p&0xFF {
+					inst.Prefix[j] |= PrefixImplicit
+				}
+			}
+		}
+	}
+
+	if inst.Op != 0 {
+		for i, p := range inst.Prefix {
+			switch p &^ PrefixIgnored {
+			case PrefixData16, PrefixData32, PrefixCS, PrefixDS, PrefixES, PrefixSS:
+				inst.Prefix[i] |= PrefixImplicit
+			}
+			if p.IsREX() {
+				inst.Prefix[i] |= PrefixImplicit
+			}
+		}
+	}
+
+	if isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
+		for i, p := range inst.Prefix {
+			if p == PrefixPT || p == PrefixPN {
+				inst.Prefix[i] |= PrefixImplicit
+			}
+		}
+	}
+
+	switch inst.Op {
+	case AAA, AAS, CBW, CDQE, CLC, CLD, CLI, CLTS, CMC, CPUID, CQO, CWD, DAA, DAS,
+		FDECSTP, FINCSTP, FNCLEX, FNINIT, FNOP, FWAIT, HLT,
+		ICEBP, INSB, INSD, INSW, INT, INTO, INVD, IRET, IRETQ,
+		LAHF, LEAVE, LRET, MONITOR, MWAIT, NOP, OUTSB, OUTSD, OUTSW,
+		PAUSE, POPA, POPF, POPFQ, PUSHA, PUSHF, PUSHFQ,
+		RDMSR, RDPMC, RDTSC, RDTSCP, RET, RSM,
+		SAHF, STC, STD, STI, SYSENTER, SYSEXIT, SYSRET,
+		UD2, WBINVD, WRMSR, XEND, XLATB, XTEST:
+
+		if inst.Op == NOP && inst.Opcode>>24 != 0x90 {
+			break
+		}
+		if inst.Op == RET && inst.Opcode>>24 != 0xC3 {
+			break
+		}
+		if inst.Op == INT && inst.Opcode>>24 != 0xCC {
+			break
+		}
+		if inst.Op == LRET && inst.Opcode>>24 != 0xcb {
+			break
+		}
+		for i, p := range inst.Prefix {
+			if p&0xFF == PrefixDataSize {
+				inst.Prefix[i] &^= PrefixImplicit | PrefixIgnored
+			}
+		}
+
+	case 0:
+		// ok
+	}
+
+	switch inst.Op {
+	case INSB, INSD, INSW, OUTSB, OUTSD, OUTSW, MONITOR, MWAIT, XLATB:
+		iargs = nil
+
+	case STOSB, STOSW, STOSD, STOSQ:
+		iargs = iargs[:1]
+
+	case LODSB, LODSW, LODSD, LODSQ, SCASB, SCASW, SCASD, SCASQ:
+		iargs = iargs[1:]
+	}
+
+	const (
+		haveData16 = 1 << iota
+		haveData32
+		haveAddr16
+		haveAddr32
+		haveXacquire
+		haveXrelease
+		haveLock
+		haveHintTaken
+		haveHintNotTaken
+		haveBnd
+	)
+	var prefixBits uint32
+	prefix := ""
+	for _, p := range inst.Prefix {
+		if p == 0 {
+			break
+		}
+		if p&0xFF == 0xF3 {
+			prefixBits &^= haveBnd
+		}
+		if p&(PrefixImplicit|PrefixIgnored) != 0 {
+			continue
+		}
+		switch p {
+		default:
+			prefix += strings.ToLower(p.String()) + " "
+		case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
+			if inst.Op == 0 {
+				prefix += strings.ToLower(p.String()) + " "
+			}
+		case PrefixREPN:
+			prefix += "repne "
+		case PrefixLOCK:
+			prefixBits |= haveLock
+		case PrefixData16, PrefixDataSize:
+			prefixBits |= haveData16
+		case PrefixData32:
+			prefixBits |= haveData32
+		case PrefixAddrSize, PrefixAddr16:
+			prefixBits |= haveAddr16
+		case PrefixAddr32:
+			prefixBits |= haveAddr32
+		case PrefixXACQUIRE:
+			prefixBits |= haveXacquire
+		case PrefixXRELEASE:
+			prefixBits |= haveXrelease
+		case PrefixPT:
+			prefixBits |= haveHintTaken
+		case PrefixPN:
+			prefixBits |= haveHintNotTaken
+		case PrefixBND:
+			prefixBits |= haveBnd
+		}
+	}
+	switch inst.Op {
+	case JMP:
+		if inst.Opcode>>24 == 0xEB {
+			prefixBits &^= haveBnd
+		}
+	case RET, LRET:
+		prefixBits &^= haveData16 | haveData32
+	}
+
+	if prefixBits&haveXacquire != 0 {
+		prefix += "xacquire "
+	}
+	if prefixBits&haveXrelease != 0 {
+		prefix += "xrelease "
+	}
+	if prefixBits&haveLock != 0 {
+		prefix += "lock "
+	}
+	if prefixBits&haveBnd != 0 {
+		prefix += "bnd "
+	}
+	if prefixBits&haveHintTaken != 0 {
+		prefix += "hint-taken "
+	}
+	if prefixBits&haveHintNotTaken != 0 {
+		prefix += "hint-not-taken "
+	}
+	if prefixBits&haveAddr16 != 0 {
+		prefix += "addr16 "
+	}
+	if prefixBits&haveAddr32 != 0 {
+		prefix += "addr32 "
+	}
+	if prefixBits&haveData16 != 0 {
+		prefix += "data16 "
+	}
+	if prefixBits&haveData32 != 0 {
+		prefix += "data32 "
+	}
+
+	if inst.Op == 0 {
+		if prefix == "" {
+			return "<no instruction>"
+		}
+		return prefix[:len(prefix)-1]
+	}
+
+	var args []string
+	for _, a := range iargs {
+		if a == nil {
+			break
+		}
+		args = append(args, intelArg(&inst, a))
+	}
+
+	var op string
+	switch inst.Op {
+	case NOP:
+		if inst.Opcode>>24 == 0x0F {
+			if inst.DataSize == 16 {
+				args = append(args, "ax")
+			} else {
+				args = append(args, "eax")
+			}
+		}
+
+	case BLENDVPD, BLENDVPS, PBLENDVB:
+		args = args[:2]
+
+	case INT:
+		if inst.Opcode>>24 == 0xCC {
+			args = nil
+			op = "int3"
+		}
+
+	case LCALL, LJMP:
+		if len(args) == 2 {
+			args[0], args[1] = args[1], args[0]
+		}
+
+	case FCHS, FABS, FTST, FLDPI, FLDL2E, FLDLG2, F2XM1, FXAM, FLD1, FLDL2T, FSQRT, FRNDINT, FCOS, FSIN:
+		if len(args) == 0 {
+			args = append(args, "st0")
+		}
+
+	case FPTAN, FSINCOS, FUCOMPP, FCOMPP, FYL2X, FPATAN, FXTRACT, FPREM1, FPREM, FYL2XP1, FSCALE:
+		if len(args) == 0 {
+			args = []string{"st0", "st1"}
+		}
+
+	case FST, FSTP, FISTTP, FIST, FISTP, FBSTP:
+		if len(args) == 1 {
+			args = append(args, "st0")
+		}
+
+	case FLD, FXCH, FCOM, FCOMP, FIADD, FIMUL, FICOM, FICOMP, FISUBR, FIDIV, FUCOM, FUCOMP, FILD, FBLD, FADD, FMUL, FSUB, FSUBR, FISUB, FDIV, FDIVR, FIDIVR:
+		if len(args) == 1 {
+			args = []string{"st0", args[0]}
+		}
+
+	case MASKMOVDQU, MASKMOVQ, XLATB, OUTSB, OUTSW, OUTSD:
+	FixSegment:
+		for i := len(inst.Prefix) - 1; i >= 0; i-- {
+			p := inst.Prefix[i] & 0xFF
+			switch p {
+			case PrefixCS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
+				if inst.Mode != 64 || p == PrefixFS || p == PrefixGS {
+					args = append(args, strings.ToLower((inst.Prefix[i] & 0xFF).String()))
+					break FixSegment
+				}
+			case PrefixDS:
+				if inst.Mode != 64 {
+					break FixSegment
+				}
+			}
+		}
+	}
+
+	if op == "" {
+		op = intelOp[inst.Op]
+	}
+	if op == "" {
+		op = strings.ToLower(inst.Op.String())
+	}
+	if args != nil {
+		op += " " + strings.Join(args, ", ")
+	}
+	return prefix + op
+}
+
+func intelArg(inst *Inst, arg Arg) string {
+	switch a := arg.(type) {
+	case Imm:
+		if inst.Mode == 32 {
+			return fmt.Sprintf("%#x", uint32(a))
+		}
+		if Imm(int32(a)) == a {
+			return fmt.Sprintf("%#x", int64(a))
+		}
+		return fmt.Sprintf("%#x", uint64(a))
+	case Mem:
+		if a.Base == EIP {
+			a.Base = RIP
+		}
+		prefix := ""
+		switch inst.MemBytes {
+		case 1:
+			prefix = "byte "
+		case 2:
+			prefix = "word "
+		case 4:
+			prefix = "dword "
+		case 8:
+			prefix = "qword "
+		case 16:
+			prefix = "xmmword "
+		}
+		switch inst.Op {
+		case INVLPG:
+			prefix = "byte "
+		case STOSB, MOVSB, CMPSB, LODSB, SCASB:
+			prefix = "byte "
+		case STOSW, MOVSW, CMPSW, LODSW, SCASW:
+			prefix = "word "
+		case STOSD, MOVSD, CMPSD, LODSD, SCASD:
+			prefix = "dword "
+		case STOSQ, MOVSQ, CMPSQ, LODSQ, SCASQ:
+			prefix = "qword "
+		case LAR:
+			prefix = "word "
+		case BOUND:
+			if inst.Mode == 32 {
+				prefix = "qword "
+			} else {
+				prefix = "dword "
+			}
+		case PREFETCHW, PREFETCHNTA, PREFETCHT0, PREFETCHT1, PREFETCHT2, CLFLUSH:
+			prefix = "zmmword "
+		}
+		switch inst.Op {
+		case MOVSB, MOVSW, MOVSD, MOVSQ, CMPSB, CMPSW, CMPSD, CMPSQ, STOSB, STOSW, STOSD, STOSQ, SCASB, SCASW, SCASD, SCASQ, LODSB, LODSW, LODSD, LODSQ:
+			switch a.Base {
+			case DI, EDI, RDI:
+				if a.Segment == ES {
+					a.Segment = 0
+				}
+			case SI, ESI, RSI:
+				if a.Segment == DS {
+					a.Segment = 0
+				}
+			}
+		case LEA:
+			a.Segment = 0
+		default:
+			switch a.Base {
+			case SP, ESP, RSP, BP, EBP, RBP:
+				if a.Segment == SS {
+					a.Segment = 0
+				}
+			default:
+				if a.Segment == DS {
+					a.Segment = 0
+				}
+			}
+		}
+
+		if inst.Mode == 64 && a.Segment != FS && a.Segment != GS {
+			a.Segment = 0
+		}
+
+		prefix += "ptr "
+		if a.Segment != 0 {
+			prefix += strings.ToLower(a.Segment.String()) + ":"
+		}
+		prefix += "["
+		if a.Base != 0 {
+			prefix += intelArg(inst, a.Base)
+		}
+		if a.Scale != 0 && a.Index != 0 {
+			if a.Base != 0 {
+				prefix += "+"
+			}
+			prefix += fmt.Sprintf("%s*%d", intelArg(inst, a.Index), a.Scale)
+		}
+		if a.Disp != 0 {
+			if prefix[len(prefix)-1] == '[' && (a.Disp >= 0 || int64(int32(a.Disp)) != a.Disp) {
+				prefix += fmt.Sprintf("%#x", uint64(a.Disp))
+			} else {
+				prefix += fmt.Sprintf("%+#x", a.Disp)
+			}
+		}
+		prefix += "]"
+		return prefix
+	case Rel:
+		return fmt.Sprintf(".%+#x", int64(a))
+	case Reg:
+		if int(a) < len(intelReg) && intelReg[a] != "" {
+			return intelReg[a]
+		}
+	}
+	return strings.ToLower(arg.String())
+}
+
+var intelOp = map[Op]string{
+	JAE:       "jnb",
+	JA:        "jnbe",
+	JGE:       "jnl",
+	JNE:       "jnz",
+	JG:        "jnle",
+	JE:        "jz",
+	SETAE:     "setnb",
+	SETA:      "setnbe",
+	SETGE:     "setnl",
+	SETNE:     "setnz",
+	SETG:      "setnle",
+	SETE:      "setz",
+	CMOVAE:    "cmovnb",
+	CMOVA:     "cmovnbe",
+	CMOVGE:    "cmovnl",
+	CMOVNE:    "cmovnz",
+	CMOVG:     "cmovnle",
+	CMOVE:     "cmovz",
+	LCALL:     "call far",
+	LJMP:      "jmp far",
+	LRET:      "ret far",
+	ICEBP:     "int1",
+	MOVSD_XMM: "movsd",
+	XLATB:     "xlat",
+}
+
+var intelReg = [...]string{
+	F0:  "st0",
+	F1:  "st1",
+	F2:  "st2",
+	F3:  "st3",
+	F4:  "st4",
+	F5:  "st5",
+	F6:  "st6",
+	F7:  "st7",
+	M0:  "mmx0",
+	M1:  "mmx1",
+	M2:  "mmx2",
+	M3:  "mmx3",
+	M4:  "mmx4",
+	M5:  "mmx5",
+	M6:  "mmx6",
+	M7:  "mmx7",
+	X0:  "xmm0",
+	X1:  "xmm1",
+	X2:  "xmm2",
+	X3:  "xmm3",
+	X4:  "xmm4",
+	X5:  "xmm5",
+	X6:  "xmm6",
+	X7:  "xmm7",
+	X8:  "xmm8",
+	X9:  "xmm9",
+	X10: "xmm10",
+	X11: "xmm11",
+	X12: "xmm12",
+	X13: "xmm13",
+	X14: "xmm14",
+	X15: "xmm15",
+
+	// TODO: Maybe the constants are named wrong.
+	SPB: "spl",
+	BPB: "bpl",
+	SIB: "sil",
+	DIB: "dil",
+
+	R8L:  "r8d",
+	R9L:  "r9d",
+	R10L: "r10d",
+	R11L: "r11d",
+	R12L: "r12d",
+	R13L: "r13d",
+	R14L: "r14d",
+	R15L: "r15d",
+}
--- a/src/cmd/internal/rsc.io/x86/x86asm/objdump_test.go
+++ b/src/cmd/internal/rsc.io/x86/x86asm/objdump_test.go
@ -0,0 +1,383 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+)
+
+func TestObjdump32Manual(t *testing.T)   { testObjdump32(t, hexCases(t, objdumpManualTests)) }
+func TestObjdump32Testdata(t *testing.T) { testObjdump32(t, concat(basicPrefixes, testdataCases(t))) }
+func TestObjdump32ModRM(t *testing.T)    { testObjdump32(t, concat(basicPrefixes, enumModRM)) }
+func TestObjdump32OneByte(t *testing.T)  { testBasic(t, testObjdump32) }
+func TestObjdump320F(t *testing.T)       { testBasic(t, testObjdump32, 0x0F) }
+func TestObjdump320F38(t *testing.T)     { testBasic(t, testObjdump32, 0x0F, 0x38) }
+func TestObjdump320F3A(t *testing.T)     { testBasic(t, testObjdump32, 0x0F, 0x3A) }
+func TestObjdump32Prefix(t *testing.T)   { testPrefix(t, testObjdump32) }
+
+func TestObjdump64Manual(t *testing.T)   { testObjdump64(t, hexCases(t, objdumpManualTests)) }
+func TestObjdump64Testdata(t *testing.T) { testObjdump64(t, concat(basicPrefixes, testdataCases(t))) }
+func TestObjdump64ModRM(t *testing.T)    { testObjdump64(t, concat(basicPrefixes, enumModRM)) }
+func TestObjdump64OneByte(t *testing.T)  { testBasic(t, testObjdump64) }
+func TestObjdump640F(t *testing.T)       { testBasic(t, testObjdump64, 0x0F) }
+func TestObjdump640F38(t *testing.T)     { testBasic(t, testObjdump64, 0x0F, 0x38) }
+func TestObjdump640F3A(t *testing.T)     { testBasic(t, testObjdump64, 0x0F, 0x3A) }
+func TestObjdump64Prefix(t *testing.T)   { testPrefix(t, testObjdump64) }
+
+func TestObjdump64REXTestdata(t *testing.T) {
+	testObjdump64(t, filter(concat3(basicPrefixes, rexPrefixes, testdataCases(t)), isValidREX))
+}
+func TestObjdump64REXModRM(t *testing.T) {
+	testObjdump64(t, concat3(basicPrefixes, rexPrefixes, enumModRM))
+}
+func TestObjdump64REXOneByte(t *testing.T) { testBasicREX(t, testObjdump64) }
+func TestObjdump64REX0F(t *testing.T)      { testBasicREX(t, testObjdump64, 0x0F) }
+func TestObjdump64REX0F38(t *testing.T)    { testBasicREX(t, testObjdump64, 0x0F, 0x38) }
+func TestObjdump64REX0F3A(t *testing.T)    { testBasicREX(t, testObjdump64, 0x0F, 0x3A) }
+func TestObjdump64REXPrefix(t *testing.T)  { testPrefixREX(t, testObjdump64) }
+
+// objdumpManualTests holds test cases that will be run by TestObjdumpManual.
+// If you are debugging a few cases that turned up in a longer run, it can be useful
+// to list them here and then use -run=ObjdumpManual, particularly with tracing enabled.
+var objdumpManualTests = `
+F390
+`
+
+// allowedMismatchObjdump reports whether the mismatch between text and dec
+// should be allowed by the test.
+func allowedMismatchObjdump(text string, size int, inst *Inst, dec ExtInst) bool {
+	if size == 15 && dec.nenc == 15 && contains(text, "truncated") && contains(dec.text, "(bad)") {
+		return true
+	}
+
+	if i := strings.LastIndex(dec.text, " "); isPrefix(dec.text[i+1:]) && size == 1 && isPrefix(text) {
+		return true
+	}
+
+	if size == dec.nenc && contains(dec.text, "movupd") && contains(dec.text, "data32") {
+		s := strings.Replace(dec.text, "data32 ", "", -1)
+		if text == s {
+			return true
+		}
+	}
+
+	// Simplify our invalid instruction text.
+	if text == "error: unrecognized instruction" {
+		text = "BAD"
+	}
+
+	// Invalid instructions for which libopcodes prints %? register.
+	// FF E8 11 22 33 44:
+	// Invalid instructions for which libopcodes prints "internal disassembler error".
+	// Invalid instructions for which libopcodes prints 8087 only (e.g., DB E0)
+	// or prints 287 only (e.g., DB E4).
+	if contains(dec.text, "%?", "<internal disassembler error>", "(8087 only)", "(287 only)") {
+		dec.text = "(bad)"
+	}
+
+	// 0F 19 11, 0F 1C 11, 0F 1D 11, 0F 1E 11, 0F 1F 11: libopcodes says nop,
+	// but the Intel manuals say that the only NOP there is 0F 1F /0.
+	// Perhaps libopcodes is reporting an older encoding.
+	i := bytes.IndexByte(dec.enc[:], 0x0F)
+	if contains(dec.text, "nop") && i >= 0 && i+2 < len(dec.enc) && dec.enc[i+1]&^7 == 0x18 && (dec.enc[i+1] != 0x1F || (dec.enc[i+2]>>3)&7 != 0) {
+		dec.text = "(bad)"
+	}
+
+	// Any invalid instruction.
+	if text == "BAD" && contains(dec.text, "(bad)") {
+		return true
+	}
+
+	// Instructions libopcodes knows but we do not (e.g., 0F 19 11).
+	if (text == "BAD" || size == 1 && isPrefix(text)) && hasPrefix(dec.text, unsupported...) {
+		return true
+	}
+
+	// Instructions we know but libopcodes does not (e.g., 0F D0 11).
+	if (contains(dec.text, "(bad)") || dec.nenc == 1 && isPrefix(dec.text)) && hasPrefix(text, libopcodesUnsupported...) {
+		return true
+	}
+
+	// Libopcodes rejects F2 90 as NOP. Not sure why.
+	if (contains(dec.text, "(bad)") || dec.nenc == 1 && isPrefix(dec.text)) && inst.Opcode>>24 == 0x90 && countPrefix(inst, 0xF2) > 0 {
+		return true
+	}
+
+	// 0F 20 11, 0F 21 11, 0F 22 11, 0F 23 11, 0F 24 11:
+	// Moves into and out of some control registers seem to be unsupported by libopcodes.
+	// TODO(rsc): Are they invalid somehow?
+	if (contains(dec.text, "(bad)") || dec.nenc == 1 && isPrefix(dec.text)) && contains(text, "%cr", "%db", "%tr") {
+		return true
+	}
+
+	if contains(dec.text, "fwait") && dec.nenc == 1 && dec.enc[0] != 0x9B {
+		return true
+	}
+
+	// 9B D9 11: libopcodes reports FSTSW instead of FWAIT + FNSTSW.
+	// This is correct in that FSTSW is a pseudo-op for the pair, but it really
+	// is a pair of instructions: execution can stop between them.
+	// Our decoder chooses to separate them.
+	if (text == "fwait" || strings.HasSuffix(text, " fwait")) && dec.nenc >= len(strings.Fields(text)) && dec.enc[len(strings.Fields(text))-1] == 0x9B {
+		return true
+	}
+
+	// 0F 18 77 11:
+	// Invalid instructions for which libopcodes prints "nop/reserved".
+	// Perhaps libopcodes is reporting an older encoding.
+	if text == "BAD" && contains(dec.text, "nop/reserved") {
+		return true
+	}
+
+	// 0F C7 B0 11 22 33 44: libopcodes says vmptrld 0x44332211(%eax); we say rdrand %eax.
+	// TODO(rsc): Fix, since we are probably wrong, but we don't have vmptrld in the manual.
+	if contains(text, "rdrand") && contains(dec.text, "vmptrld", "vmxon", "vmclear") {
+		return true
+	}
+
+	// DD C8: libopcodes says FNOP but the Intel manual is clear FNOP is only D9 D0.
+	// Perhaps libopcodes is reporting an older encoding.
+	if text == "BAD" && contains(dec.text, "fnop") && (dec.enc[0] != 0xD9 || dec.enc[1] != 0xD0) {
+		return true
+	}
+
+	// 66 90: libopcodes says xchg %ax,%ax; we say 'data16 nop'.
+	// The 16-bit swap will preserve the high bits of the register,
+	// so they are the same.
+	if contains(text, "nop") && contains(dec.text, "xchg %ax,%ax") {
+		return true
+	}
+
+	// If there are multiple prefixes, allow libopcodes to use an alternate name.
+	if size == 1 && dec.nenc == 1 && prefixByte[text] > 0 && prefixByte[text] == prefixByte[dec.text] {
+		return true
+	}
+
+	// 26 9B: libopcodes reports "fwait"/1, ignoring segment prefix.
+	// https://sourceware.org/bugzilla/show_bug.cgi?id=16891
+	// F0 82: Decode="lock"/1 but libopcodes="lock (bad)"/2.
+	if size == 1 && dec.nenc >= 1 && prefixByte[text] == dec.enc[0] && contains(dec.text, "(bad)", "fwait", "fnop") {
+		return true
+	}
+
+	// libopcodes interprets 660f801122 as taking a rel16 but
+	// truncating the address at 16 bits. Not sure what is correct.
+	if contains(text, ".+0x2211", ".+0x11") && contains(dec.text, " .-") {
+		return true
+	}
+
+	// 66 F3 0F D6 C5, 66 F2 0F D6 C0: libopcodes reports use of XMM register instead of MMX register,
+	// but only when the instruction has a 66 prefix. Maybe they know something we don't.
+	if countPrefix(inst, 0x66) > 0 && contains(dec.text, "movdq2q", "movq2dq") && !contains(dec.text, "%mm") {
+		return true
+	}
+
+	// 0F 01 F8, 0F 05, 0F 07: these are 64-bit instructions but libopcodes accepts them.
+	if (text == "BAD" || size == 1 && isPrefix(text)) && contains(dec.text, "swapgs", "syscall", "sysret", "rdfsbase", "rdgsbase", "wrfsbase", "wrgsbase") {
+		return true
+	}
+
+	return false
+}
+
+// Instructions known to libopcodes (or xed) but not to us.
+// Most of these come from supplementary manuals of one form or another.
+var unsupported = strings.Fields(`
+	bndc
+	bndl
+	bndm
+	bnds
+	clac
+	clgi
+	femms
+	fldln
+	fldz
+	getsec
+	invlpga
+	kmov
+	montmul
+	pavg
+	pf2i
+	pfacc
+	pfadd
+	pfcmp
+	pfmax
+	pfmin
+	pfmul
+	pfna
+	pfpnac
+	pfrc
+	pfrs
+	pfsub
+	phadd
+	phsub
+	pi2f
+	pmulhr
+	prefetch
+	pswap
+	ptest
+	rdseed
+	sha1
+	sha256
+	skinit
+	stac
+	stgi
+	vadd
+	vand
+	vcmp
+	vcomis
+	vcvt
+	vcvt
+	vdiv
+	vhadd
+	vhsub
+	vld
+	vmax
+	vmcall
+	vmfunc
+	vmin
+	vmlaunch
+	vmload
+	vmmcall
+	vmov
+	vmov
+	vmov
+	vmptrld
+	vmptrst
+	vmread
+	vmresume
+	vmrun
+	vmsave
+	vmul
+	vmwrite
+	vmxoff
+	vor
+	vpack
+	vpadd
+	vpand
+	vpavg
+	vpcmp
+	vpcmp
+	vpins
+	vpmadd
+	vpmax
+	vpmin
+	vpmul
+	vpmul
+	vpor
+	vpsad
+	vpshuf
+	vpsll
+	vpsra
+	vpsrad
+	vpsrl
+	vpsub
+	vpunp
+	vpxor
+	vrcp
+	vrsqrt
+	vshuf
+	vsqrt
+	vsub
+	vucomis
+	vunp
+	vxor
+	vzero
+	xcrypt
+	xsha1
+	xsha256
+	xstore-rng
+	insertq
+	extrq
+	vmclear
+	invvpid
+	adox
+	vmxon
+	invept
+	adcx
+	vmclear
+	prefetchwt1
+	enclu
+	encls
+	salc
+	fstpnce
+	fdisi8087_nop
+	fsetpm287_nop
+	feni8087_nop
+	syscall
+	sysret
+`)
+
+// Instructions known to us but not to libopcodes (at least in binutils 2.24).
+var libopcodesUnsupported = strings.Fields(`
+	addsubps
+	aes
+	blend
+	cvttpd2dq
+	dpp
+	extract
+	haddps
+	hsubps
+	insert
+	invpcid
+	lddqu
+	movmsk
+	movnt
+	movq2dq
+	mps
+	pack
+	pblend
+	pclmul
+	pcmp
+	pext
+	phmin
+	pins
+	pmax
+	pmin
+	pmov
+	pmovmsk
+	pmul
+	popcnt
+	pslld
+	psllq
+	psllw
+	psrad
+	psraw
+	psrl
+	ptest
+	punpck
+	round
+	xrstor
+	xsavec
+	xsaves
+	comis
+	ucomis
+	movhps
+	movntps
+	rsqrt
+	rcpp
+	puncpck
+	bsf
+	movq2dq
+	cvttpd2dq
+	movq
+	hsubpd
+	movdqa
+	movhpd
+	addsubpd
+	movd
+	haddpd
+	cvtps2dq
+	bsr
+	cvtdq2ps
+	rdrand
+	maskmov
+	movq2dq
+	movlhps
+	movbe
+	movlpd
+`)
--- a/src/cmd/internal/rsc.io/x86/x86asm/objdumpext_test.go
+++ b/src/cmd/internal/rsc.io/x86/x86asm/objdumpext_test.go
@ -0,0 +1,314 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"bytes"
+	"debug/elf"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+// Apologies for the proprietary path, but we need objdump 2.24 + some committed patches that will land in 2.25.
+const objdumpPath = "/Users/rsc/bin/objdump2"
+
+func testObjdump32(t *testing.T, generate func(func([]byte))) {
+	testObjdumpArch(t, generate, 32)
+}
+
+func testObjdump64(t *testing.T, generate func(func([]byte))) {
+	testObjdumpArch(t, generate, 64)
+}
+
+func testObjdumpArch(t *testing.T, generate func(func([]byte)), arch int) {
+	if testing.Short() {
+		t.Skip("skipping objdump test in short mode")
+	}
+
+	if _, err := os.Stat(objdumpPath); err != nil {
+		t.Fatal(err)
+	}
+
+	testExtDis(t, "gnu", arch, objdump, generate, allowedMismatchObjdump)
+}
+
+func objdump(ext *ExtDis) error {
+	// File already written with instructions; add ELF header.
+	if ext.Arch == 32 {
+		if err := writeELF32(ext.File, ext.Size); err != nil {
+			return err
+		}
+	} else {
+		if err := writeELF64(ext.File, ext.Size); err != nil {
+			return err
+		}
+	}
+
+	b, err := ext.Run(objdumpPath, "-d", "-z", ext.File.Name())
+	if err != nil {
+		return err
+	}
+
+	var (
+		nmatch  int
+		reading bool
+		next    uint32 = start
+		addr    uint32
+		encbuf  [32]byte
+		enc     []byte
+		text    string
+	)
+	flush := func() {
+		if addr == next {
+			switch text {
+			case "repz":
+				text = "rep"
+			case "repnz":
+				text = "repn"
+			default:
+				text = strings.Replace(text, "repz ", "rep ", -1)
+				text = strings.Replace(text, "repnz ", "repn ", -1)
+			}
+			if m := pcrelw.FindStringSubmatch(text); m != nil {
+				targ, _ := strconv.ParseUint(m[2], 16, 64)
+				text = fmt.Sprintf("%s .%+#x", m[1], int16(uint32(targ)-uint32(uint16(addr))-uint32(len(enc))))
+			}
+			if m := pcrel.FindStringSubmatch(text); m != nil {
+				targ, _ := strconv.ParseUint(m[2], 16, 64)
+				text = fmt.Sprintf("%s .%+#x", m[1], int32(uint32(targ)-addr-uint32(len(enc))))
+			}
+			text = strings.Replace(text, "0x0(", "(", -1)
+			text = strings.Replace(text, "%st(0)", "%st", -1)
+
+			ext.Dec <- ExtInst{addr, encbuf, len(enc), text}
+			encbuf = [32]byte{}
+			enc = nil
+			next += 32
+		}
+	}
+	var textangle = []byte("<.text>:")
+	for {
+		line, err := b.ReadSlice('\n')
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			return fmt.Errorf("reading objdump output: %v", err)
+		}
+		if bytes.Contains(line, textangle) {
+			reading = true
+			continue
+		}
+		if !reading {
+			continue
+		}
+		if debug {
+			os.Stdout.Write(line)
+		}
+		if enc1 := parseContinuation(line, encbuf[:len(enc)]); enc1 != nil {
+			enc = enc1
+			continue
+		}
+		flush()
+		nmatch++
+		addr, enc, text = parseLine(line, encbuf[:0])
+		if addr > next {
+			return fmt.Errorf("address out of sync expected <= %#x at %q in:\n%s", next, line, line)
+		}
+	}
+	flush()
+	if next != start+uint32(ext.Size) {
+		return fmt.Errorf("not enough results found [%d %d]", next, start+ext.Size)
+	}
+	if err := ext.Wait(); err != nil {
+		return fmt.Errorf("exec: %v", err)
+	}
+
+	return nil
+}
+
+func parseLine(line []byte, encstart []byte) (addr uint32, enc []byte, text string) {
+	oline := line
+	i := index(line, ":\t")
+	if i < 0 {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	x, err := strconv.ParseUint(string(trimSpace(line[:i])), 16, 32)
+	if err != nil {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	addr = uint32(x)
+	line = line[i+2:]
+	i = bytes.IndexByte(line, '\t')
+	if i < 0 {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	enc, ok := parseHex(line[:i], encstart)
+	if !ok {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	line = trimSpace(line[i:])
+	if i := bytes.IndexByte(line, '#'); i >= 0 {
+		line = trimSpace(line[:i])
+	}
+	text = string(fixSpace(line))
+	return
+}
+
+func parseContinuation(line []byte, enc []byte) []byte {
+	i := index(line, ":\t")
+	if i < 0 {
+		return nil
+	}
+	line = line[i+1:]
+	enc, _ = parseHex(line, enc)
+	return enc
+}
+
+// writeELF32 writes an ELF32 header to the file,
+// describing a text segment that starts at start
+// and extends for size bytes.
+func writeELF32(f *os.File, size int) error {
+	f.Seek(0, 0)
+	var hdr elf.Header32
+	var prog elf.Prog32
+	var sect elf.Section32
+	var buf bytes.Buffer
+	binary.Write(&buf, binary.LittleEndian, &hdr)
+	off1 := buf.Len()
+	binary.Write(&buf, binary.LittleEndian, &prog)
+	off2 := buf.Len()
+	binary.Write(&buf, binary.LittleEndian, &sect)
+	off3 := buf.Len()
+	buf.Reset()
+	data := byte(elf.ELFDATA2LSB)
+	hdr = elf.Header32{
+		Ident:     [16]byte{0x7F, 'E', 'L', 'F', 1, data, 1},
+		Type:      2,
+		Machine:   uint16(elf.EM_386),
+		Version:   1,
+		Entry:     start,
+		Phoff:     uint32(off1),
+		Shoff:     uint32(off2),
+		Flags:     0x05000002,
+		Ehsize:    uint16(off1),
+		Phentsize: uint16(off2 - off1),
+		Phnum:     1,
+		Shentsize: uint16(off3 - off2),
+		Shnum:     3,
+		Shstrndx:  2,
+	}
+	binary.Write(&buf, binary.LittleEndian, &hdr)
+	prog = elf.Prog32{
+		Type:   1,
+		Off:    start,
+		Vaddr:  start,
+		Paddr:  start,
+		Filesz: uint32(size),
+		Memsz:  uint32(size),
+		Flags:  5,
+		Align:  start,
+	}
+	binary.Write(&buf, binary.LittleEndian, &prog)
+	binary.Write(&buf, binary.LittleEndian, &sect) // NULL section
+	sect = elf.Section32{
+		Name:      1,
+		Type:      uint32(elf.SHT_PROGBITS),
+		Addr:      start,
+		Off:       start,
+		Size:      uint32(size),
+		Flags:     uint32(elf.SHF_ALLOC | elf.SHF_EXECINSTR),
+		Addralign: 4,
+	}
+	binary.Write(&buf, binary.LittleEndian, &sect) // .text
+	sect = elf.Section32{
+		Name:      uint32(len("\x00.text\x00")),
+		Type:      uint32(elf.SHT_STRTAB),
+		Addr:      0,
+		Off:       uint32(off2 + (off3-off2)*3),
+		Size:      uint32(len("\x00.text\x00.shstrtab\x00")),
+		Addralign: 1,
+	}
+	binary.Write(&buf, binary.LittleEndian, &sect)
+	buf.WriteString("\x00.text\x00.shstrtab\x00")
+	f.Write(buf.Bytes())
+	return nil
+}
+
+// writeELF64 writes an ELF64 header to the file,
+// describing a text segment that starts at start
+// and extends for size bytes.
+func writeELF64(f *os.File, size int) error {
+	f.Seek(0, 0)
+	var hdr elf.Header64
+	var prog elf.Prog64
+	var sect elf.Section64
+	var buf bytes.Buffer
+	binary.Write(&buf, binary.LittleEndian, &hdr)
+	off1 := buf.Len()
+	binary.Write(&buf, binary.LittleEndian, &prog)
+	off2 := buf.Len()
+	binary.Write(&buf, binary.LittleEndian, &sect)
+	off3 := buf.Len()
+	buf.Reset()
+	data := byte(elf.ELFDATA2LSB)
+	hdr = elf.Header64{
+		Ident:     [16]byte{0x7F, 'E', 'L', 'F', 2, data, 1},
+		Type:      2,
+		Machine:   uint16(elf.EM_X86_64),
+		Version:   1,
+		Entry:     start,
+		Phoff:     uint64(off1),
+		Shoff:     uint64(off2),
+		Flags:     0x05000002,
+		Ehsize:    uint16(off1),
+		Phentsize: uint16(off2 - off1),
+		Phnum:     1,
+		Shentsize: uint16(off3 - off2),
+		Shnum:     3,
+		Shstrndx:  2,
+	}
+	binary.Write(&buf, binary.LittleEndian, &hdr)
+	prog = elf.Prog64{
+		Type:   1,
+		Off:    start,
+		Vaddr:  start,
+		Paddr:  start,
+		Filesz: uint64(size),
+		Memsz:  uint64(size),
+		Flags:  5,
+		Align:  start,
+	}
+	binary.Write(&buf, binary.LittleEndian, &prog)
+	binary.Write(&buf, binary.LittleEndian, &sect) // NULL section
+	sect = elf.Section64{
+		Name:      1,
+		Type:      uint32(elf.SHT_PROGBITS),
+		Addr:      start,
+		Off:       start,
+		Size:      uint64(size),
+		Flags:     uint64(elf.SHF_ALLOC | elf.SHF_EXECINSTR),
+		Addralign: 4,
+	}
+	binary.Write(&buf, binary.LittleEndian, &sect) // .text
+	sect = elf.Section64{
+		Name:      uint32(len("\x00.text\x00")),
+		Type:      uint32(elf.SHT_STRTAB),
+		Addr:      0,
+		Off:       uint64(off2 + (off3-off2)*3),
+		Size:      uint64(len("\x00.text\x00.shstrtab\x00")),
+		Addralign: 1,
+	}
+	binary.Write(&buf, binary.LittleEndian, &sect)
+	buf.WriteString("\x00.text\x00.shstrtab\x00")
+	f.Write(buf.Bytes())
+	return nil
+}
--- a/src/cmd/internal/rsc.io/x86/x86asm/plan9ext_test.go
+++ b/src/cmd/internal/rsc.io/x86/x86asm/plan9ext_test.go
@ -0,0 +1,120 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"strconv"
+	"testing"
+)
+
+const plan9Path = "testdata/libmach8db"
+
+func testPlan9Arch(t *testing.T, arch int, generate func(func([]byte))) {
+	if testing.Short() {
+		t.Skip("skipping libmach test in short mode")
+	}
+
+	if _, err := os.Stat(plan9Path); err != nil {
+		t.Fatal(err)
+	}
+
+	testExtDis(t, "plan9", arch, plan9, generate, allowedMismatchPlan9)
+}
+
+func testPlan932(t *testing.T, generate func(func([]byte))) {
+	testPlan9Arch(t, 32, generate)
+}
+
+func testPlan964(t *testing.T, generate func(func([]byte))) {
+	testPlan9Arch(t, 64, generate)
+}
+
+func plan9(ext *ExtDis) error {
+	flag := "-8"
+	if ext.Arch == 64 {
+		flag = "-6"
+	}
+	b, err := ext.Run(plan9Path, flag, ext.File.Name())
+	if err != nil {
+		return err
+	}
+
+	nmatch := 0
+	next := uint32(start)
+	var (
+		addr   uint32
+		encbuf [32]byte
+		enc    []byte
+		text   string
+	)
+
+	for {
+		line, err := b.ReadSlice('\n')
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			return fmt.Errorf("reading libmach8db output: %v", err)
+		}
+		if debug {
+			os.Stdout.Write(line)
+		}
+		nmatch++
+		addr, enc, text = parseLinePlan9(line, encbuf[:0])
+		if addr > next {
+			return fmt.Errorf("address out of sync expected <= %#x at %q in:\n%s", next, line, line)
+		}
+		if addr < next {
+			continue
+		}
+		if m := pcrelw.FindStringSubmatch(text); m != nil {
+			targ, _ := strconv.ParseUint(m[2], 16, 64)
+			text = fmt.Sprintf("%s .%+#x", m[1], int16(uint32(targ)-uint32(uint16(addr))-uint32(len(enc))))
+		}
+		if m := pcrel.FindStringSubmatch(text); m != nil {
+			targ, _ := strconv.ParseUint(m[2], 16, 64)
+			text = fmt.Sprintf("%s .%+#x", m[1], int32(uint32(targ)-addr-uint32(len(enc))))
+		}
+		ext.Dec <- ExtInst{addr, encbuf, len(enc), text}
+		encbuf = [32]byte{}
+		enc = nil
+		next += 32
+	}
+	if next != start+uint32(ext.Size) {
+		return fmt.Errorf("not enough results found [%d %d]", next, start+ext.Size)
+	}
+	if err := ext.Wait(); err != nil {
+		return fmt.Errorf("exec: %v", err)
+	}
+
+	return nil
+}
+
+func parseLinePlan9(line []byte, encstart []byte) (addr uint32, enc []byte, text string) {
+	i := bytes.IndexByte(line, ' ')
+	if i < 0 || line[0] != '0' || line[1] != 'x' {
+		log.Fatalf("cannot parse disassembly: %q", line)
+	}
+	j := bytes.IndexByte(line[i+1:], ' ')
+	if j < 0 {
+		log.Fatalf("cannot parse disassembly: %q", line)
+	}
+	j += i + 1
+	x, err := strconv.ParseUint(string(trimSpace(line[2:i])), 16, 32)
+	if err != nil {
+		log.Fatalf("cannot parse disassembly: %q", line)
+	}
+	addr = uint32(x)
+	enc, ok := parseHex(line[i+1:j], encstart)
+	if !ok {
+		log.Fatalf("cannot parse disassembly: %q", line)
+	}
+	return addr, enc, string(fixSpace(line[j+1:]))
+}
--- a/src/cmd/internal/rsc.io/x86/x86asm/plan9x.go
+++ b/src/cmd/internal/rsc.io/x86/x86asm/plan9x.go
@ -0,0 +1,346 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"fmt"
+	"strings"
+)
+
+// Plan9Syntax returns the Go assembler syntax for the instruction.
+// The syntax was originally defined by Plan 9.
+// The pc is the program counter of the instruction, used for expanding
+// PC-relative addresses into absolute ones.
+// The symname function queries the symbol table for the program
+// being disassembled. Given a target address it returns the name and base
+// address of the symbol containing the target, if any; otherwise it returns "", 0.
+func Plan9Syntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) string {
+	if symname == nil {
+		symname = func(uint64) (string, uint64) { return "", 0 }
+	}
+	var args []string
+	for i := len(inst.Args) - 1; i >= 0; i-- {
+		a := inst.Args[i]
+		if a == nil {
+			continue
+		}
+		args = append(args, plan9Arg(&inst, pc, symname, a))
+	}
+
+	var last Prefix
+	for _, p := range inst.Prefix {
+		if p == 0 || p.IsREX() {
+			break
+		}
+		last = p
+	}
+
+	prefix := ""
+	switch last & 0xFF {
+	case 0, 0x66, 0x67:
+		// ignore
+	case PrefixREPN:
+		prefix += "REPNE "
+	default:
+		prefix += last.String() + " "
+	}
+
+	op := inst.Op.String()
+	if plan9Suffix[inst.Op] {
+		switch inst.DataSize {
+		case 8:
+			op += "B"
+		case 16:
+			op += "W"
+		case 32:
+			op += "L"
+		case 64:
+			op += "Q"
+		}
+	}
+
+	if args != nil {
+		op += " " + strings.Join(args, ", ")
+	}
+
+	return prefix + op
+}
+
+func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg Arg) string {
+	switch a := arg.(type) {
+	case Reg:
+		return plan9Reg[a]
+	case Rel:
+		if pc == 0 {
+			break
+		}
+		// If the absolute address is the start of a symbol, use the name.
+		// Otherwise use the raw address, so that things like relative
+		// jumps show up as JMP 0x123 instead of JMP f+10(SB).
+		// It is usually easier to search for 0x123 than to do the mental
+		// arithmetic to find f+10.
+		addr := pc + uint64(inst.Len) + uint64(a)
+		if s, base := symname(addr); s != "" && addr == base {
+			return fmt.Sprintf("%s(SB)", s)
+		}
+		return fmt.Sprintf("%#x", addr)
+
+	case Imm:
+		if s, base := symname(uint64(a)); s != "" {
+			suffix := ""
+			if uint64(a) != base {
+				suffix = fmt.Sprintf("%+d", uint64(a)-base)
+			}
+			return fmt.Sprintf("$%s%s(SB)", s, suffix)
+		}
+		if inst.Mode == 32 {
+			return fmt.Sprintf("$%#x", uint32(a))
+		}
+		if Imm(int32(a)) == a {
+			return fmt.Sprintf("$%#x", int64(a))
+		}
+		return fmt.Sprintf("$%#x", uint64(a))
+	case Mem:
+		if a.Segment == 0 && a.Disp != 0 && a.Base == 0 && (a.Index == 0 || a.Scale == 0) {
+			if s, base := symname(uint64(a.Disp)); s != "" {
+				suffix := ""
+				if uint64(a.Disp) != base {
+					suffix = fmt.Sprintf("%+d", uint64(a.Disp)-base)
+				}
+				return fmt.Sprintf("%s%s(SB)", s, suffix)
+			}
+		}
+		s := ""
+		if a.Segment != 0 {
+			s += fmt.Sprintf("%s:", plan9Reg[a.Segment])
+		}
+		if a.Disp != 0 {
+			s += fmt.Sprintf("%#x", a.Disp)
+		} else {
+			s += "0"
+		}
+		if a.Base != 0 {
+			s += fmt.Sprintf("(%s)", plan9Reg[a.Base])
+		}
+		if a.Index != 0 && a.Scale != 0 {
+			s += fmt.Sprintf("(%s*%d)", plan9Reg[a.Index], a.Scale)
+		}
+		return s
+	}
+	return arg.String()
+}
+
+var plan9Suffix = [maxOp + 1]bool{
+	ADC:       true,
+	ADD:       true,
+	AND:       true,
+	BSF:       true,
+	BSR:       true,
+	BT:        true,
+	BTC:       true,
+	BTR:       true,
+	BTS:       true,
+	CMP:       true,
+	CMPXCHG:   true,
+	CVTSI2SD:  true,
+	CVTSI2SS:  true,
+	CVTSD2SI:  true,
+	CVTSS2SI:  true,
+	CVTTSD2SI: true,
+	CVTTSS2SI: true,
+	DEC:       true,
+	DIV:       true,
+	FLDENV:    true,
+	FRSTOR:    true,
+	IDIV:      true,
+	IMUL:      true,
+	IN:        true,
+	INC:       true,
+	LEA:       true,
+	MOV:       true,
+	MOVNTI:    true,
+	MUL:       true,
+	NEG:       true,
+	NOP:       true,
+	NOT:       true,
+	OR:        true,
+	OUT:       true,
+	POP:       true,
+	POPA:      true,
+	PUSH:      true,
+	PUSHA:     true,
+	RCL:       true,
+	RCR:       true,
+	ROL:       true,
+	ROR:       true,
+	SAR:       true,
+	SBB:       true,
+	SHL:       true,
+	SHLD:      true,
+	SHR:       true,
+	SHRD:      true,
+	SUB:       true,
+	TEST:      true,
+	XADD:      true,
+	XCHG:      true,
+	XOR:       true,
+}
+
+var plan9Reg = [...]string{
+	AL:   "AL",
+	CL:   "CL",
+	BL:   "BL",
+	DL:   "DL",
+	AH:   "AH",
+	CH:   "CH",
+	BH:   "BH",
+	DH:   "DH",
+	SPB:  "SP",
+	BPB:  "BP",
+	SIB:  "SI",
+	DIB:  "DI",
+	R8B:  "R8",
+	R9B:  "R9",
+	R10B: "R10",
+	R11B: "R11",
+	R12B: "R12",
+	R13B: "R13",
+	R14B: "R14",
+	R15B: "R15",
+	AX:   "AX",
+	CX:   "CX",
+	BX:   "BX",
+	DX:   "DX",
+	SP:   "SP",
+	BP:   "BP",
+	SI:   "SI",
+	DI:   "DI",
+	R8W:  "R8",
+	R9W:  "R9",
+	R10W: "R10",
+	R11W: "R11",
+	R12W: "R12",
+	R13W: "R13",
+	R14W: "R14",
+	R15W: "R15",
+	EAX:  "AX",
+	ECX:  "CX",
+	EDX:  "DX",
+	EBX:  "BX",
+	ESP:  "SP",
+	EBP:  "BP",
+	ESI:  "SI",
+	EDI:  "DI",
+	R8L:  "R8",
+	R9L:  "R9",
+	R10L: "R10",
+	R11L: "R11",
+	R12L: "R12",
+	R13L: "R13",
+	R14L: "R14",
+	R15L: "R15",
+	RAX:  "AX",
+	RCX:  "CX",
+	RDX:  "DX",
+	RBX:  "BX",
+	RSP:  "SP",
+	RBP:  "BP",
+	RSI:  "SI",
+	RDI:  "DI",
+	R8:   "R8",
+	R9:   "R9",
+	R10:  "R10",
+	R11:  "R11",
+	R12:  "R12",
+	R13:  "R13",
+	R14:  "R14",
+	R15:  "R15",
+	IP:   "IP",
+	EIP:  "IP",
+	RIP:  "IP",
+	F0:   "F0",
+	F1:   "F1",
+	F2:   "F2",
+	F3:   "F3",
+	F4:   "F4",
+	F5:   "F5",
+	F6:   "F6",
+	F7:   "F7",
+	M0:   "M0",
+	M1:   "M1",
+	M2:   "M2",
+	M3:   "M3",
+	M4:   "M4",
+	M5:   "M5",
+	M6:   "M6",
+	M7:   "M7",
+	X0:   "X0",
+	X1:   "X1",
+	X2:   "X2",
+	X3:   "X3",
+	X4:   "X4",
+	X5:   "X5",
+	X6:   "X6",
+	X7:   "X7",
+	X8:   "X8",
+	X9:   "X9",
+	X10:  "X10",
+	X11:  "X11",
+	X12:  "X12",
+	X13:  "X13",
+	X14:  "X14",
+	X15:  "X15",
+	CS:   "CS",
+	SS:   "SS",
+	DS:   "DS",
+	ES:   "ES",
+	FS:   "FS",
+	GS:   "GS",
+	GDTR: "GDTR",
+	IDTR: "IDTR",
+	LDTR: "LDTR",
+	MSW:  "MSW",
+	TASK: "TASK",
+	CR0:  "CR0",
+	CR1:  "CR1",
+	CR2:  "CR2",
+	CR3:  "CR3",
+	CR4:  "CR4",
+	CR5:  "CR5",
+	CR6:  "CR6",
+	CR7:  "CR7",
+	CR8:  "CR8",
+	CR9:  "CR9",
+	CR10: "CR10",
+	CR11: "CR11",
+	CR12: "CR12",
+	CR13: "CR13",
+	CR14: "CR14",
+	CR15: "CR15",
+	DR0:  "DR0",
+	DR1:  "DR1",
+	DR2:  "DR2",
+	DR3:  "DR3",
+	DR4:  "DR4",
+	DR5:  "DR5",
+	DR6:  "DR6",
+	DR7:  "DR7",
+	DR8:  "DR8",
+	DR9:  "DR9",
+	DR10: "DR10",
+	DR11: "DR11",
+	DR12: "DR12",
+	DR13: "DR13",
+	DR14: "DR14",
+	DR15: "DR15",
+	TR0:  "TR0",
+	TR1:  "TR1",
+	TR2:  "TR2",
+	TR3:  "TR3",
+	TR4:  "TR4",
+	TR5:  "TR5",
+	TR6:  "TR6",
+	TR7:  "TR7",
+}
--- a/src/cmd/internal/rsc.io/x86/x86asm/plan9x_test.go
+++ b/src/cmd/internal/rsc.io/x86/x86asm/plan9x_test.go
@ -0,0 +1,54 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestPlan932Manual(t *testing.T)   { testPlan932(t, hexCases(t, plan9ManualTests)) }
+func TestPlan932Testdata(t *testing.T) { testPlan932(t, concat(basicPrefixes, testdataCases(t))) }
+func TestPlan932ModRM(t *testing.T)    { testPlan932(t, concat(basicPrefixes, enumModRM)) }
+func TestPlan932OneByte(t *testing.T)  { testBasic(t, testPlan932) }
+func TestPlan9320F(t *testing.T)       { testBasic(t, testPlan932, 0x0F) }
+func TestPlan9320F38(t *testing.T)     { testBasic(t, testPlan932, 0x0F, 0x38) }
+func TestPlan9320F3A(t *testing.T)     { testBasic(t, testPlan932, 0x0F, 0x3A) }
+func TestPlan932Prefix(t *testing.T)   { testPrefix(t, testPlan932) }
+
+func TestPlan964Manual(t *testing.T)   { testPlan964(t, hexCases(t, plan9ManualTests)) }
+func TestPlan964Testdata(t *testing.T) { testPlan964(t, concat(basicPrefixes, testdataCases(t))) }
+func TestPlan964ModRM(t *testing.T)    { testPlan964(t, concat(basicPrefixes, enumModRM)) }
+func TestPlan964OneByte(t *testing.T)  { testBasic(t, testPlan964) }
+func TestPlan9640F(t *testing.T)       { testBasic(t, testPlan964, 0x0F) }
+func TestPlan9640F38(t *testing.T)     { testBasic(t, testPlan964, 0x0F, 0x38) }
+func TestPlan9640F3A(t *testing.T)     { testBasic(t, testPlan964, 0x0F, 0x3A) }
+func TestPlan964Prefix(t *testing.T)   { testPrefix(t, testPlan964) }
+
+func TestPlan964REXTestdata(t *testing.T) {
+	testPlan964(t, filter(concat3(basicPrefixes, rexPrefixes, testdataCases(t)), isValidREX))
+}
+func TestPlan964REXModRM(t *testing.T)   { testPlan964(t, concat3(basicPrefixes, rexPrefixes, enumModRM)) }
+func TestPlan964REXOneByte(t *testing.T) { testBasicREX(t, testPlan964) }
+func TestPlan964REX0F(t *testing.T)      { testBasicREX(t, testPlan964, 0x0F) }
+func TestPlan964REX0F38(t *testing.T)    { testBasicREX(t, testPlan964, 0x0F, 0x38) }
+func TestPlan964REX0F3A(t *testing.T)    { testBasicREX(t, testPlan964, 0x0F, 0x3A) }
+func TestPlan964REXPrefix(t *testing.T)  { testPrefixREX(t, testPlan964) }
+
+// plan9ManualTests holds test cases that will be run by TestPlan9Manual32 and TestPlan9Manual64.
+// If you are debugging a few cases that turned up in a longer run, it can be useful
+// to list them here and then use -run=Plan9Manual, particularly with tracing enabled.
+var plan9ManualTests = `
+`
+
+// allowedMismatchPlan9 reports whether the mismatch between text and dec
+// should be allowed by the test.
+func allowedMismatchPlan9(text string, size int, inst *Inst, dec ExtInst) bool {
+	return false
+}
+
+// Instructions known to us but not to plan9.
+var plan9Unsupported = strings.Fields(`
+`)
--- a/src/cmd/internal/rsc.io/x86/x86asm/tables.go
+++ b/src/cmd/internal/rsc.io/x86/x86asm/tables.go
--- a/src/cmd/internal/rsc.io/x86/x86asm/testdata/Makefile
+++ b/src/cmd/internal/rsc.io/x86/x86asm/testdata/Makefile
@ -0,0 +1,12 @@
+libmach8db: libmach8db.c
+	9c libmach8db.c && 9l -o libmach8db libmach8db.o; rm libmach8db.o
+
+newdecode.txt:
+	cd ..; go test -cover -run 'Objdump.*32' -v -timeout 10h -printtests 2>&1 | tee log
+	cd ..; go test -cover -run 'Objdump.*64' -v -timeout 10h -printtests 2>&1 | tee -a log
+	cd ..; go test -cover -run 'Xed.*32' -v -timeout 10h -printtests 2>&1 | tee -a log
+	cd ..; go test -cover -run 'Xed.*64' -v -timeout 10h -printtests 2>&1 | tee -a log
+	cd ..; go test -cover -run 'Plan9.*32' -v -timeout 10h -printtests 2>&1 | tee -a log
+	cd ..; go test -cover -run 'Plan9.*64' -v -timeout 10h -printtests 2>&1 | tee -a log
+	egrep '	(gnu|intel|plan9)	' ../log |sort >newdecode.txt
+
--- a/src/cmd/internal/rsc.io/x86/x86asm/testdata/decode.txt
+++ b/src/cmd/internal/rsc.io/x86/x86asm/testdata/decode.txt
--- a/src/cmd/internal/rsc.io/x86/x86asm/xed_test.go
+++ b/src/cmd/internal/rsc.io/x86/x86asm/xed_test.go
@ -0,0 +1,211 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+)
+
+func TestXed32Manual(t *testing.T)   { testXed32(t, hexCases(t, xedManualTests)) }
+func TestXed32Testdata(t *testing.T) { testXed32(t, concat(basicPrefixes, testdataCases(t))) }
+func TestXed32ModRM(t *testing.T)    { testXed32(t, concat(basicPrefixes, enumModRM)) }
+func TestXed32OneByte(t *testing.T)  { testBasic(t, testXed32) }
+func TestXed320F(t *testing.T)       { testBasic(t, testXed32, 0x0F) }
+func TestXed320F38(t *testing.T)     { testBasic(t, testXed32, 0x0F, 0x38) }
+func TestXed320F3A(t *testing.T)     { testBasic(t, testXed32, 0x0F, 0x3A) }
+func TestXed32Prefix(t *testing.T)   { testPrefix(t, testXed32) }
+
+func TestXed64Manual(t *testing.T)   { testXed64(t, hexCases(t, xedManualTests)) }
+func TestXed64Testdata(t *testing.T) { testXed64(t, concat(basicPrefixes, testdataCases(t))) }
+func TestXed64ModRM(t *testing.T)    { testXed64(t, concat(basicPrefixes, enumModRM)) }
+func TestXed64OneByte(t *testing.T)  { testBasic(t, testXed64) }
+func TestXed640F(t *testing.T)       { testBasic(t, testXed64, 0x0F) }
+func TestXed640F38(t *testing.T)     { testBasic(t, testXed64, 0x0F, 0x38) }
+func TestXed640F3A(t *testing.T)     { testBasic(t, testXed64, 0x0F, 0x3A) }
+func TestXed64Prefix(t *testing.T)   { testPrefix(t, testXed64) }
+
+func TestXed64REXTestdata(t *testing.T) {
+	testXed64(t, filter(concat3(basicPrefixes, rexPrefixes, testdataCases(t)), isValidREX))
+}
+func TestXed64REXModRM(t *testing.T)   { testXed64(t, concat3(basicPrefixes, rexPrefixes, enumModRM)) }
+func TestXed64REXOneByte(t *testing.T) { testBasicREX(t, testXed64) }
+func TestXed64REX0F(t *testing.T)      { testBasicREX(t, testXed64, 0x0F) }
+func TestXed64REX0F38(t *testing.T)    { testBasicREX(t, testXed64, 0x0F, 0x38) }
+func TestXed64REX0F3A(t *testing.T)    { testBasicREX(t, testXed64, 0x0F, 0x3A) }
+func TestXed64REXPrefix(t *testing.T)  { testPrefixREX(t, testXed64) }
+
+// xedManualTests holds test cases that will be run by TestXedManual32 and TestXedManual64.
+// If you are debugging a few cases that turned up in a longer run, it can be useful
+// to list them here and then use -run=XedManual, particularly with tracing enabled.
+var xedManualTests = `
+6690
+`
+
+// allowedMismatchXed reports whether the mismatch between text and dec
+// should be allowed by the test.
+func allowedMismatchXed(text string, size int, inst *Inst, dec ExtInst) bool {
+	if (contains(text, "error:") || isPrefix(text) && size == 1) && contains(dec.text, "GENERAL_ERROR", "INSTR_TOO_LONG", "BAD_LOCK_PREFIX") {
+		return true
+	}
+
+	if contains(dec.text, "BAD_LOCK_PREFIX") && countExactPrefix(inst, PrefixLOCK|PrefixInvalid) > 0 {
+		return true
+	}
+
+	if contains(dec.text, "BAD_LOCK_PREFIX", "GENERAL_ERROR") && countExactPrefix(inst, PrefixLOCK|PrefixImplicit) > 0 {
+		return true
+	}
+
+	if text == "lock" && size == 1 && contains(dec.text, "BAD_LOCK_PREFIX") {
+		return true
+	}
+
+	// Instructions not known to us.
+	if (contains(text, "error:") || isPrefix(text) && size == 1) && contains(dec.text, unsupported...) {
+		return true
+	}
+
+	// Instructions not known to xed.
+	if contains(text, xedUnsupported...) && contains(dec.text, "ERROR") {
+		return true
+	}
+
+	if (contains(text, "error:") || isPrefix(text) && size == 1) && contains(dec.text, "shl ") && (inst.Opcode>>16)&0xEC38 == 0xC030 {
+		return true
+	}
+
+	// 82 11 22: xed says 'adc byte ptr [ecx], 0x22' but there is no justification in the manuals for that.
+	// C0 30 11: xed says 'shl byte ptr [eax], 0x11' but there is no justification in the manuals for that.
+	// F6 08 11: xed says 'test byte ptr [eax], 0x11' but there is no justification in the manuals for that.
+	if (contains(text, "error:") || isPrefix(text) && size == 1) && hasByte(dec.enc[:dec.nenc], 0x82, 0xC0, 0xC1, 0xD0, 0xD1, 0xD2, 0xD3, 0xF6, 0xF7) {
+		return true
+	}
+
+	// F3 11 22 and many others: xed allows and drops misused rep/repn prefix.
+	if (text == "rep" && dec.enc[0] == 0xF3 || (text == "repn" || text == "repne") && dec.enc[0] == 0xF2) && (!contains(dec.text, "ins", "outs", "movs", "lods", "cmps", "scas") || contains(dec.text, "xmm")) {
+		return true
+	}
+
+	// 0F C7 30: xed says vmptrld qword ptr [eax]; we say rdrand eax.
+	// TODO(rsc): Fix, since we are probably wrong, but we don't have vmptrld in the manual.
+	if contains(text, "rdrand") && contains(dec.text, "vmptrld", "vmxon", "vmclear") {
+		return true
+	}
+
+	// F3 0F AE 00: we say 'rdfsbase dword ptr [eax]' but RDFSBASE needs a register.
+	// Also, this is a 64-bit only instruction.
+	// TODO(rsc): Fix to reject this encoding.
+	if contains(text, "rdfsbase", "rdgsbase", "wrfsbase", "wrgsbase") && contains(dec.text, "ERROR") {
+		return true
+	}
+
+	// 0F 01 F8: we say swapgs but that's only valid in 64-bit mode.
+	// TODO(rsc): Fix.
+	if contains(text, "swapgs") {
+		return true
+	}
+
+	// 0F 24 11: 'mov ecx, tr2' except there is no TR2.
+	// Or maybe the MOV to TR registers doesn't use RMF.
+	if contains(text, "cr1", "cr5", "cr6", "cr7", "tr0", "tr1", "tr2", "tr3", "tr4", "tr5", "tr6", "tr7") && contains(dec.text, "ERROR") {
+		return true
+	}
+
+	// 0F 19 11, 0F 1C 11, 0F 1D 11, 0F 1E 11, 0F 1F 11: xed says nop,
+	// but the Intel manuals say that the only NOP there is 0F 1F /0.
+	// Perhaps xed is reporting an older encoding.
+	if (contains(text, "error:") || isPrefix(text) && size == 1) && contains(dec.text, "nop ") && (inst.Opcode>>8)&0xFFFF38 != 0x0F1F00 {
+		return true
+	}
+
+	// 66 0F AE 38: clflushopt but we only know clflush
+	if contains(text, "clflush") && contains(dec.text, "clflushopt") {
+		return true
+	}
+
+	// 0F 20 04 11: MOV SP, CR0 but has mod!=3 despite register argument.
+	// (This encoding ignores the mod bits.) The decoder sees the non-register
+	// mod and reads farther ahead to decode the memory reference that
+	// isn't really there, causing the size to be too large.
+	// TODO(rsc): Fix.
+	if text == dec.text && size > dec.nenc && contains(text, " cr", " dr", " tr") {
+		return true
+	}
+
+	// 0F AE E9: xed says lfence, which is wrong (only 0F AE E8 is lfence). And so on.
+	if contains(dec.text, "fence") && hasByte(dec.enc[:dec.nenc], 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF) {
+		return true
+	}
+
+	// DD C9, DF C9: xed says 'fxch st0, st1' but that instruction is D9 C9.
+	if (contains(text, "error:") || isPrefix(text) && size == 1) && contains(dec.text, "fxch ") && hasByte(dec.enc[:dec.nenc], 0xDD, 0xDF) {
+		return true
+	}
+
+	// DC D4: xed says 'fcom st0, st4' but that instruction is D8 D4.
+	if (contains(text, "error:") || isPrefix(text) && size == 1) && contains(dec.text, "fcom ") && hasByte(dec.enc[:dec.nenc], 0xD8, 0xDC) {
+		return true
+	}
+
+	// DE D4: xed says 'fcomp st0, st4' but that instruction is D8 D4.
+	if (contains(text, "error:") || isPrefix(text) && size == 1) && contains(dec.text, "fcomp ") && hasByte(dec.enc[:dec.nenc], 0xDC, 0xDE) {
+		return true
+	}
+
+	// DF D4: xed says 'fstp st4, st0' but that instruction is DD D4.
+	if (contains(text, "error:") || isPrefix(text) && size == 1) && contains(dec.text, "fstp ") && hasByte(dec.enc[:dec.nenc], 0xDF) {
+		return true
+	}
+
+	return false
+}
+
+func countExactPrefix(inst *Inst, target Prefix) int {
+	n := 0
+	for _, p := range inst.Prefix {
+		if p == target {
+			n++
+		}
+	}
+	return n
+}
+
+func hasByte(src []byte, target ...byte) bool {
+	for _, b := range target {
+		if bytes.IndexByte(src, b) >= 0 {
+			return true
+		}
+	}
+	return false
+}
+
+// Instructions known to us but not to xed.
+var xedUnsupported = strings.Fields(`
+	xrstor
+	xsave
+	xsave
+	ud1
+	xgetbv
+	xsetbv
+	fxsave
+	fxrstor
+	clflush
+	lfence
+	mfence
+	sfence
+	rsqrtps
+	rcpps
+	emms
+	ldmxcsr
+	stmxcsr
+	movhpd
+	movnti
+	rdrand
+	movbe
+	movlpd
+	sysret
+`)
--- a/src/cmd/internal/rsc.io/x86/x86asm/xedext_test.go
+++ b/src/cmd/internal/rsc.io/x86/x86asm/xedext_test.go
@ -0,0 +1,206 @@
+package x86asm
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+// xed binary from Intel sde-external-6.22.0-2014-03-06.
+const xedPath = "/Users/rsc/bin/xed"
+
+func testXedArch(t *testing.T, arch int, generate func(func([]byte))) {
+	if testing.Short() {
+		t.Skip("skipping libmach test in short mode")
+	}
+
+	if _, err := os.Stat(xedPath); err != nil {
+		t.Fatal(err)
+	}
+
+	testExtDis(t, "intel", arch, xed, generate, allowedMismatchXed)
+}
+
+func testXed32(t *testing.T, generate func(func([]byte))) {
+	testXedArch(t, 32, generate)
+}
+
+func testXed64(t *testing.T, generate func(func([]byte))) {
+	testXedArch(t, 64, generate)
+}
+
+func xed(ext *ExtDis) error {
+	b, err := ext.Run(xedPath, fmt.Sprintf("-%d", ext.Arch), "-n", "1G", "-ir", ext.File.Name())
+	if err != nil {
+		return err
+	}
+
+	nmatch := 0
+	next := uint32(start)
+	var (
+		addr   uint32
+		encbuf [32]byte
+		enc    []byte
+		text   string
+	)
+
+	var xedEnd = []byte("# end of text section")
+	var xedEnd1 = []byte("# Errors")
+
+	eof := false
+	for {
+		line, err := b.ReadSlice('\n')
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			return fmt.Errorf("reading objdump output: %v", err)
+		}
+		if debug {
+			os.Stdout.Write(line)
+		}
+		if bytes.HasPrefix(line, xedEnd) || bytes.HasPrefix(line, xedEnd1) {
+			eof = true
+		}
+		if eof {
+			continue
+		}
+		nmatch++
+		addr, enc, text = parseLineXed(line, encbuf[:0])
+		if addr > next {
+			return fmt.Errorf("address out of sync expected <= %#x at %q in:\n%s", next, line, line)
+		}
+		if addr < next {
+			continue
+		}
+		switch text {
+		case "repz":
+			text = "rep"
+		case "repnz":
+			text = "repn"
+		default:
+			text = strings.Replace(text, "repz ", "rep ", -1)
+			text = strings.Replace(text, "repnz ", "repn ", -1)
+		}
+		if m := pcrelw.FindStringSubmatch(text); m != nil {
+			targ, _ := strconv.ParseUint(m[2], 16, 64)
+			text = fmt.Sprintf("%s .%+#x", m[1], int16(uint32(targ)-uint32(uint16(addr))-uint32(len(enc))))
+		}
+		if m := pcrel.FindStringSubmatch(text); m != nil {
+			targ, _ := strconv.ParseUint(m[2], 16, 64)
+			text = fmt.Sprintf("%s .%+#x", m[1], int32(uint32(targ)-addr-uint32(len(enc))))
+		}
+		ext.Dec <- ExtInst{addr, encbuf, len(enc), text}
+		encbuf = [32]byte{}
+		enc = nil
+		next += 32
+	}
+	if next != start+uint32(ext.Size) {
+		return fmt.Errorf("not enough results found [%d %d]", next, start+ext.Size)
+	}
+	if err := ext.Wait(); err != nil {
+		return fmt.Errorf("exec: %v", err)
+	}
+
+	return nil
+}
+
+var (
+	xedInRaw    = []byte("In raw...")
+	xedDots     = []byte("...")
+	xdis        = []byte("XDIS ")
+	xedError    = []byte("ERROR: ")
+	xedNoDecode = []byte("Could not decode at offset: 0x")
+)
+
+func parseLineXed(line []byte, encstart []byte) (addr uint32, enc []byte, text string) {
+	oline := line
+	if bytes.HasPrefix(line, xedInRaw) || bytes.HasPrefix(line, xedDots) {
+		return 0, nil, ""
+	}
+	if bytes.HasPrefix(line, xedError) {
+		i := bytes.IndexByte(line[len(xedError):], ' ')
+		if i < 0 {
+			log.Fatalf("cannot parse error: %q", oline)
+		}
+		errstr := string(line[len(xedError):])
+		i = bytes.Index(line, xedNoDecode)
+		if i < 0 {
+			log.Fatalf("cannot parse error: %q", oline)
+		}
+		i += len(xedNoDecode)
+		j := bytes.IndexByte(line[i:], ' ')
+		if j < 0 {
+			log.Fatalf("cannot parse error: %q", oline)
+		}
+		x, err := strconv.ParseUint(string(trimSpace(line[i:i+j])), 16, 32)
+		if err != nil {
+			log.Fatalf("cannot parse disassembly: %q", oline)
+		}
+		addr = uint32(x)
+		return addr, nil, errstr
+	}
+
+	if !bytes.HasPrefix(line, xdis) {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+
+	i := bytes.IndexByte(line, ':')
+	if i < 0 {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	x, err := strconv.ParseUint(string(trimSpace(line[len(xdis):i])), 16, 32)
+	if err != nil {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	addr = uint32(x)
+
+	// spaces
+	i++
+	for i < len(line) && line[i] == ' ' {
+		i++
+	}
+	// instruction class, spaces
+	for i < len(line) && line[i] != ' ' {
+		i++
+	}
+	for i < len(line) && line[i] == ' ' {
+		i++
+	}
+	// instruction set, spaces
+	for i < len(line) && line[i] != ' ' {
+		i++
+	}
+	for i < len(line) && line[i] == ' ' {
+		i++
+	}
+
+	// hex
+	hexStart := i
+	for i < len(line) && line[i] != ' ' {
+		i++
+	}
+	hexEnd := i
+	for i < len(line) && line[i] == ' ' {
+		i++
+	}
+
+	// text
+	textStart := i
+	for i < len(line) && line[i] != '\n' {
+		i++
+	}
+	textEnd := i
+
+	enc, ok := parseHex(line[hexStart:hexEnd], encstart)
+	if !ok {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+
+	return addr, enc, string(fixSpace(line[textStart:textEnd]))
+}
--- a/src/cmd/objdump/armasm.go
+++ b/src/cmd/objdump/armasm.go
--- a/src/cmd/objdump/main.go
+++ b/src/cmd/objdump/main.go
@ -50,6 +50,9 @@ import (
 	"strconv"
 	"strings"
 	"text/tabwriter"
+
+	"cmd/internal/rsc.io/arm/armasm"
+	"cmd/internal/rsc.io/x86/x86asm"
 )

 var symregexp = flag.String("s", "", "only dump symbols matching this regexp")
@ -199,14 +202,14 @@ func disasm_amd64(code []byte, pc uint64, lookup lookupFunc) (string, int) {
 }

 func disasm_x86(code []byte, pc uint64, lookup lookupFunc, arch int) (string, int) {
-	inst, err := x86_Decode(code, 64)
+	inst, err := x86asm.Decode(code, 64)
 	var text string
 	size := inst.Len
 	if err != nil || size == 0 || inst.Op == 0 {
 		size = 1
 		text = "?"
 	} else {
-		text = x86_plan9Syntax(inst, pc, lookup)
+		text = x86asm.Plan9Syntax(inst, pc, lookup)
 	}
 	return text, size
 }
@ -232,14 +235,14 @@ func (r textReader) ReadAt(data []byte, off int64) (n int, err error) {
 }

 func disasm_arm(code []byte, pc uint64, lookup lookupFunc) (string, int) {
-	inst, err := arm_Decode(code, arm_ModeARM)
+	inst, err := armasm.Decode(code, armasm.ModeARM)
 	var text string
 	size := inst.Len
 	if err != nil || size == 0 || inst.Op == 0 {
 		size = 4
 		text = "?"
 	} else {
-		text = arm_plan9Syntax(inst, pc, lookup, textReader{code, pc})
+		text = armasm.Plan9Syntax(inst, pc, lookup, textReader{code, pc})
 	}
 	return text, size
 }
--- a/src/cmd/objdump/x86.go
+++ b/src/cmd/objdump/x86.go