mirror of
				https://github.com/golang/go.git
				synced 2025-11-04 02:30:57 +00:00 
			
		
		
		
	cmd/compile: use generated loops instead of DUFFCOPY on riscv64
MemmoveKnownSize112-4 632.1Mi ± 1% 1288.5Mi ± 0% +103.85% (p=0.000 n=10) MemmoveKnownSize128-4 636.1Mi ± 0% 1280.9Mi ± 1% +101.36% (p=0.000 n=10) MemmoveKnownSize192-4 645.3Mi ± 0% 1306.9Mi ± 1% +102.53% (p=0.000 n=10) MemmoveKnownSize248-4 650.2Mi ± 2% 1312.5Mi ± 1% +101.87% (p=0.000 n=10) MemmoveKnownSize256-4 650.7Mi ± 0% 1303.6Mi ± 1% +100.33% (p=0.000 n=10) MemmoveKnownSize512-4 658.2Mi ± 1% 1293.9Mi ± 0% +96.60% (p=0.000 n=10) MemmoveKnownSize1024-4 662.1Mi ± 0% 1312.6Mi ± 0% +98.26% (p=0.000 n=10) Change-Id: I43681ca029880025558b33ddc4295da3947c9b28 Reviewed-on: https://go-review.googlesource.com/c/go/+/700537 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Mark Freeman <markfreeman@google.com>
This commit is contained in:
		
							parent
							
								
									879ff736d3
								
							
						
					
					
						commit
						4dac9e093f
					
				
					 5 changed files with 186 additions and 222 deletions
				
			
		| 
						 | 
				
			
			@ -822,44 +822,99 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 | 
			
		|||
		}
 | 
			
		||||
 | 
			
		||||
	case ssa.OpRISCV64LoweredMove:
 | 
			
		||||
		mov, sz := largestMove(v.AuxInt)
 | 
			
		||||
		dst := v.Args[0].Reg()
 | 
			
		||||
		src := v.Args[1].Reg()
 | 
			
		||||
		if dst == src {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		//	mov	(Rarg1), T2
 | 
			
		||||
		//	mov	T2, (Rarg0)
 | 
			
		||||
		//	ADD	$sz, Rarg0
 | 
			
		||||
		//	ADD	$sz, Rarg1
 | 
			
		||||
		//	BGEU	Rarg2, Rarg0, -4(PC)
 | 
			
		||||
		sa := v.AuxValAndOff()
 | 
			
		||||
		n := sa.Val64()
 | 
			
		||||
		mov, sz := largestMove(sa.Off64())
 | 
			
		||||
 | 
			
		||||
		p := s.Prog(mov)
 | 
			
		||||
		p.From.Type = obj.TYPE_MEM
 | 
			
		||||
		p.From.Reg = v.Args[1].Reg()
 | 
			
		||||
		var off int64
 | 
			
		||||
		tmp := int16(riscv.REG_X5)
 | 
			
		||||
		for n >= sz {
 | 
			
		||||
			moveOp(s, mov, dst, src, tmp, off)
 | 
			
		||||
			off += sz
 | 
			
		||||
			n -= sz
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		for i := len(fracMovOps) - 1; i >= 0; i-- {
 | 
			
		||||
			tsz := int64(1 << i)
 | 
			
		||||
			if n < tsz {
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
			moveOp(s, fracMovOps[i], dst, src, tmp, off)
 | 
			
		||||
			off += tsz
 | 
			
		||||
			n -= tsz
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
	case ssa.OpRISCV64LoweredMoveLoop:
 | 
			
		||||
		dst := v.Args[0].Reg()
 | 
			
		||||
		src := v.Args[1].Reg()
 | 
			
		||||
		if dst == src {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		sc := v.AuxValAndOff()
 | 
			
		||||
		n := sc.Val64()
 | 
			
		||||
		mov, sz := largestMove(sc.Off64())
 | 
			
		||||
		chunk := 8 * sz
 | 
			
		||||
 | 
			
		||||
		if n <= 3*chunk {
 | 
			
		||||
			v.Fatalf("MoveLoop too small:%d, expect:%d", n, 3*chunk)
 | 
			
		||||
		}
 | 
			
		||||
		tmp := int16(riscv.REG_X5)
 | 
			
		||||
 | 
			
		||||
		p := s.Prog(riscv.AADD)
 | 
			
		||||
		p.From.Type = obj.TYPE_CONST
 | 
			
		||||
		p.From.Offset = n - n%chunk
 | 
			
		||||
		p.Reg = src
 | 
			
		||||
		p.To.Type = obj.TYPE_REG
 | 
			
		||||
		p.To.Reg = riscv.REG_T2
 | 
			
		||||
		p.To.Reg = riscv.REG_X6
 | 
			
		||||
 | 
			
		||||
		p2 := s.Prog(mov)
 | 
			
		||||
		p2.From.Type = obj.TYPE_REG
 | 
			
		||||
		p2.From.Reg = riscv.REG_T2
 | 
			
		||||
		p2.To.Type = obj.TYPE_MEM
 | 
			
		||||
		p2.To.Reg = v.Args[0].Reg()
 | 
			
		||||
		for i := int64(0); i < 8; i++ {
 | 
			
		||||
			moveOp(s, mov, dst, src, tmp, sz*i)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		p3 := s.Prog(riscv.AADD)
 | 
			
		||||
		p3.From.Type = obj.TYPE_CONST
 | 
			
		||||
		p3.From.Offset = sz
 | 
			
		||||
		p3.To.Type = obj.TYPE_REG
 | 
			
		||||
		p3.To.Reg = v.Args[0].Reg()
 | 
			
		||||
		p1 := s.Prog(riscv.AADD)
 | 
			
		||||
		p1.From.Type = obj.TYPE_CONST
 | 
			
		||||
		p1.From.Offset = chunk
 | 
			
		||||
		p1.To.Type = obj.TYPE_REG
 | 
			
		||||
		p1.To.Reg = src
 | 
			
		||||
 | 
			
		||||
		p4 := s.Prog(riscv.AADD)
 | 
			
		||||
		p4.From.Type = obj.TYPE_CONST
 | 
			
		||||
		p4.From.Offset = sz
 | 
			
		||||
		p4.To.Type = obj.TYPE_REG
 | 
			
		||||
		p4.To.Reg = v.Args[1].Reg()
 | 
			
		||||
		p2 := s.Prog(riscv.AADD)
 | 
			
		||||
		p2.From.Type = obj.TYPE_CONST
 | 
			
		||||
		p2.From.Offset = chunk
 | 
			
		||||
		p2.To.Type = obj.TYPE_REG
 | 
			
		||||
		p2.To.Reg = dst
 | 
			
		||||
 | 
			
		||||
		p5 := s.Prog(riscv.ABGEU)
 | 
			
		||||
		p5.To.Type = obj.TYPE_BRANCH
 | 
			
		||||
		p5.Reg = v.Args[1].Reg()
 | 
			
		||||
		p5.From.Type = obj.TYPE_REG
 | 
			
		||||
		p5.From.Reg = v.Args[2].Reg()
 | 
			
		||||
		p5.To.SetTarget(p)
 | 
			
		||||
		p3 := s.Prog(riscv.ABNE)
 | 
			
		||||
		p3.From.Reg = riscv.REG_X6
 | 
			
		||||
		p3.From.Type = obj.TYPE_REG
 | 
			
		||||
		p3.Reg = src
 | 
			
		||||
		p3.To.Type = obj.TYPE_BRANCH
 | 
			
		||||
		p3.To.SetTarget(p.Link)
 | 
			
		||||
 | 
			
		||||
		n %= chunk
 | 
			
		||||
 | 
			
		||||
		var off int64
 | 
			
		||||
		for n >= sz {
 | 
			
		||||
			moveOp(s, mov, dst, src, tmp, off)
 | 
			
		||||
			off += sz
 | 
			
		||||
			n -= sz
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		for i := len(fracMovOps) - 1; i >= 0; i-- {
 | 
			
		||||
			tsz := int64(1 << i)
 | 
			
		||||
			if n < tsz {
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
			moveOp(s, fracMovOps[i], dst, src, tmp, off)
 | 
			
		||||
			off += tsz
 | 
			
		||||
			n -= tsz
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
	case ssa.OpRISCV64LoweredNilCheck:
 | 
			
		||||
		// Issue a load which will fault if arg is nil.
 | 
			
		||||
| 
						 | 
				
			
			@ -1023,3 +1078,21 @@ func zeroOp(s *ssagen.State, mov obj.As, reg int16, off int64) {
 | 
			
		|||
	p.To.Offset = off
 | 
			
		||||
	return
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func moveOp(s *ssagen.State, mov obj.As, dst int16, src int16, tmp int16, off int64) {
 | 
			
		||||
	p := s.Prog(mov)
 | 
			
		||||
	p.From.Type = obj.TYPE_MEM
 | 
			
		||||
	p.From.Reg = src
 | 
			
		||||
	p.From.Offset = off
 | 
			
		||||
	p.To.Type = obj.TYPE_REG
 | 
			
		||||
	p.To.Reg = tmp
 | 
			
		||||
 | 
			
		||||
	p1 := s.Prog(mov)
 | 
			
		||||
	p1.From.Type = obj.TYPE_REG
 | 
			
		||||
	p1.From.Reg = tmp
 | 
			
		||||
	p1.To.Type = obj.TYPE_MEM
 | 
			
		||||
	p1.To.Reg = dst
 | 
			
		||||
	p1.To.Offset = off
 | 
			
		||||
 | 
			
		||||
	return
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -442,37 +442,16 @@
 | 
			
		|||
	(MOVHstore [4] dst (MOVHload [4] src mem)
 | 
			
		||||
		(MOVHstore [2] dst (MOVHload [2] src mem)
 | 
			
		||||
			(MOVHstore dst (MOVHload src mem) mem)))
 | 
			
		||||
(Move [12] {t} dst src mem) && t.Alignment()%4 == 0 =>
 | 
			
		||||
	(MOVWstore [8] dst (MOVWload [8] src mem)
 | 
			
		||||
		(MOVWstore [4] dst (MOVWload [4] src mem)
 | 
			
		||||
			(MOVWstore dst (MOVWload src mem) mem)))
 | 
			
		||||
(Move [16] {t} dst src mem) && t.Alignment()%8 == 0 =>
 | 
			
		||||
	(MOVDstore [8] dst (MOVDload [8] src mem)
 | 
			
		||||
		(MOVDstore dst (MOVDload src mem) mem))
 | 
			
		||||
(Move [24] {t} dst src mem) && t.Alignment()%8 == 0 =>
 | 
			
		||||
	(MOVDstore [16] dst (MOVDload [16] src mem)
 | 
			
		||||
		(MOVDstore [8] dst (MOVDload [8] src mem)
 | 
			
		||||
			(MOVDstore dst (MOVDload src mem) mem)))
 | 
			
		||||
(Move [32] {t} dst src mem) && t.Alignment()%8 == 0 =>
 | 
			
		||||
	(MOVDstore [24] dst (MOVDload [24] src mem)
 | 
			
		||||
		(MOVDstore [16] dst (MOVDload [16] src mem)
 | 
			
		||||
			(MOVDstore [8] dst (MOVDload [8] src mem)
 | 
			
		||||
				(MOVDstore dst (MOVDload src mem) mem))))
 | 
			
		||||
 | 
			
		||||
// Medium 8-aligned move uses a Duff's device
 | 
			
		||||
// 16 and 128 are magic constants, see runtime/mkduff.go
 | 
			
		||||
(Move [s] {t} dst src mem)
 | 
			
		||||
	&& s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0
 | 
			
		||||
// Generic move
 | 
			
		||||
(Move [s] {t} dst src mem) && s > 0 && s <= 3*8*moveSize(t.Alignment(), config)
 | 
			
		||||
	&& logLargeCopy(v, s) =>
 | 
			
		||||
	(DUFFCOPY [16 * (128 - s/8)] dst src mem)
 | 
			
		||||
	(LoweredMove [makeValAndOff(int32(s),int32(t.Alignment()))] dst src mem)
 | 
			
		||||
 | 
			
		||||
// Generic move uses a loop
 | 
			
		||||
(Move [s] {t} dst src mem) && (s <= 16 || logLargeCopy(v, s)) =>
 | 
			
		||||
	(LoweredMove [t.Alignment()]
 | 
			
		||||
		dst
 | 
			
		||||
		src
 | 
			
		||||
		(ADDI <src.Type> [s-moveSize(t.Alignment(), config)] src)
 | 
			
		||||
		mem)
 | 
			
		||||
(Move [s] {t} dst src mem) && s > 3*8*moveSize(t.Alignment(), config)
 | 
			
		||||
	&& logLargeCopy(v, s) =>
 | 
			
		||||
	(LoweredMoveLoop [makeValAndOff(int32(s),int32(t.Alignment()))] dst src mem)
 | 
			
		||||
 | 
			
		||||
// Boolean ops; 0=false, 1=true
 | 
			
		||||
(AndB ...) => (AND ...)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -117,6 +117,7 @@ func init() {
 | 
			
		|||
 | 
			
		||||
	regCtxt := regNamed["X26"]
 | 
			
		||||
	callerSave := gpMask | fpMask | regNamed["g"]
 | 
			
		||||
	r5toR6 := regNamed["X5"] | regNamed["X6"]
 | 
			
		||||
 | 
			
		||||
	var (
 | 
			
		||||
		gpstore  = regInfo{inputs: []regMask{gpspsbMask, gpspMask, 0}} // SB in first input so we can load from a global, but not in second to avoid using SB as a temporary register
 | 
			
		||||
| 
						 | 
				
			
			@ -354,27 +355,51 @@ func init() {
 | 
			
		|||
		},
 | 
			
		||||
 | 
			
		||||
		// general unaligned move
 | 
			
		||||
		// arg0 = address of dst memory (in X5, changed as side effect)
 | 
			
		||||
		// arg1 = address of src memory (in X6, changed as side effect)
 | 
			
		||||
		// arg2 = address of the last element of src (can't be X7 as we clobber it before using arg2)
 | 
			
		||||
		// arg3 = mem
 | 
			
		||||
		// auxint = alignment
 | 
			
		||||
		// clobbers X7 as a tmp register.
 | 
			
		||||
		// arg0 = address of dst memory (clobber)
 | 
			
		||||
		// arg1 = address of src memory (clobber)
 | 
			
		||||
		// arg2 = mem
 | 
			
		||||
		// auxint = size and type alignment
 | 
			
		||||
		// returns mem
 | 
			
		||||
		//	mov	(X6), X7
 | 
			
		||||
		//	mov	X7, (X5)
 | 
			
		||||
		//	ADD	$sz, X5
 | 
			
		||||
		//	ADD	$sz, X6
 | 
			
		||||
		//	BGEU	Rarg2, X5, -4(PC)
 | 
			
		||||
		//	mov	(offset)(Rarg1), TMP
 | 
			
		||||
		//	mov	TMP, (offset)(Rarg0)
 | 
			
		||||
		{
 | 
			
		||||
			name:      "LoweredMove",
 | 
			
		||||
			aux:       "Int64",
 | 
			
		||||
			argLength: 4,
 | 
			
		||||
			aux:       "SymValAndOff",
 | 
			
		||||
			symEffect: "Write",
 | 
			
		||||
			argLength: 3,
 | 
			
		||||
			reg: regInfo{
 | 
			
		||||
				inputs:   []regMask{regNamed["X5"], regNamed["X6"], gpMask &^ regNamed["X7"]},
 | 
			
		||||
				clobbers: regNamed["X5"] | regNamed["X6"] | regNamed["X7"],
 | 
			
		||||
				inputs:   []regMask{gpMask &^ regNamed["X5"], gpMask &^ regNamed["X5"]},
 | 
			
		||||
				clobbers: regNamed["X5"],
 | 
			
		||||
			},
 | 
			
		||||
			faultOnNilArg0: true,
 | 
			
		||||
			faultOnNilArg1: true,
 | 
			
		||||
		},
 | 
			
		||||
 | 
			
		||||
		// general unaligned move
 | 
			
		||||
		// arg0 = address of dst memory (clobber)
 | 
			
		||||
		// arg1 = address of src memory (clobber)
 | 
			
		||||
		// arg3 = mem
 | 
			
		||||
		// auxint = alignment
 | 
			
		||||
		// returns mem
 | 
			
		||||
		//	ADD	$sz, X6
 | 
			
		||||
		//loop:
 | 
			
		||||
		//	mov	(Rarg1), X5
 | 
			
		||||
		//	mov	X5, (Rarg0)
 | 
			
		||||
		//	...rest 7 mov...
 | 
			
		||||
		//	ADD	$sz, Rarg0
 | 
			
		||||
		//	ADD	$sz, Rarg1
 | 
			
		||||
		//	BNE	X6, Rarg1, loop
 | 
			
		||||
		{
 | 
			
		||||
			name:      "LoweredMoveLoop",
 | 
			
		||||
			aux:       "SymValAndOff",
 | 
			
		||||
			argLength: 3,
 | 
			
		||||
			symEffect: "Write",
 | 
			
		||||
			reg: regInfo{
 | 
			
		||||
				inputs:       []regMask{gpMask &^ r5toR6, gpMask &^ r5toR6},
 | 
			
		||||
				clobbers:     r5toR6,
 | 
			
		||||
				clobbersArg0: true,
 | 
			
		||||
				clobbersArg1: true,
 | 
			
		||||
			},
 | 
			
		||||
			typ:            "Mem",
 | 
			
		||||
			faultOnNilArg0: true,
 | 
			
		||||
			faultOnNilArg1: true,
 | 
			
		||||
		},
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2571,6 +2571,7 @@ const (
 | 
			
		|||
	OpRISCV64LoweredZero
 | 
			
		||||
	OpRISCV64LoweredZeroLoop
 | 
			
		||||
	OpRISCV64LoweredMove
 | 
			
		||||
	OpRISCV64LoweredMoveLoop
 | 
			
		||||
	OpRISCV64LoweredAtomicLoad8
 | 
			
		||||
	OpRISCV64LoweredAtomicLoad32
 | 
			
		||||
	OpRISCV64LoweredAtomicLoad64
 | 
			
		||||
| 
						 | 
				
			
			@ -34585,17 +34586,34 @@ var opcodeTable = [...]opInfo{
 | 
			
		|||
	},
 | 
			
		||||
	{
 | 
			
		||||
		name:           "LoweredMove",
 | 
			
		||||
		auxType:        auxInt64,
 | 
			
		||||
		argLen:         4,
 | 
			
		||||
		auxType:        auxSymValAndOff,
 | 
			
		||||
		argLen:         3,
 | 
			
		||||
		faultOnNilArg0: true,
 | 
			
		||||
		faultOnNilArg1: true,
 | 
			
		||||
		symEffect:      SymWrite,
 | 
			
		||||
		reg: regInfo{
 | 
			
		||||
			inputs: []inputInfo{
 | 
			
		||||
				{0, 16},         // X5
 | 
			
		||||
				{1, 32},         // X6
 | 
			
		||||
				{2, 1006632880}, // X5 X6 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
 | 
			
		||||
				{0, 1006632928}, // X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
 | 
			
		||||
				{1, 1006632928}, // X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
 | 
			
		||||
			},
 | 
			
		||||
			clobbers: 112, // X5 X6 X7
 | 
			
		||||
			clobbers: 16, // X5
 | 
			
		||||
		},
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		name:           "LoweredMoveLoop",
 | 
			
		||||
		auxType:        auxSymValAndOff,
 | 
			
		||||
		argLen:         3,
 | 
			
		||||
		faultOnNilArg0: true,
 | 
			
		||||
		faultOnNilArg1: true,
 | 
			
		||||
		symEffect:      SymWrite,
 | 
			
		||||
		reg: regInfo{
 | 
			
		||||
			inputs: []inputInfo{
 | 
			
		||||
				{0, 1006632896}, // X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
 | 
			
		||||
				{1, 1006632896}, // X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
 | 
			
		||||
			},
 | 
			
		||||
			clobbers:     48, // X5 X6
 | 
			
		||||
			clobbersArg0: true,
 | 
			
		||||
			clobbersArg1: true,
 | 
			
		||||
		},
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3090,169 +3090,38 @@ func rewriteValueRISCV64_OpMove(v *Value) bool {
 | 
			
		|||
		v.AddArg3(dst, v0, v1)
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
	// match: (Move [12] {t} dst src mem)
 | 
			
		||||
	// cond: t.Alignment()%4 == 0
 | 
			
		||||
	// result: (MOVWstore [8] dst (MOVWload [8] src mem) (MOVWstore [4] dst (MOVWload [4] src mem) (MOVWstore dst (MOVWload src mem) mem)))
 | 
			
		||||
	for {
 | 
			
		||||
		if auxIntToInt64(v.AuxInt) != 12 {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		t := auxToType(v.Aux)
 | 
			
		||||
		dst := v_0
 | 
			
		||||
		src := v_1
 | 
			
		||||
		mem := v_2
 | 
			
		||||
		if !(t.Alignment()%4 == 0) {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		v.reset(OpRISCV64MOVWstore)
 | 
			
		||||
		v.AuxInt = int32ToAuxInt(8)
 | 
			
		||||
		v0 := b.NewValue0(v.Pos, OpRISCV64MOVWload, typ.Int32)
 | 
			
		||||
		v0.AuxInt = int32ToAuxInt(8)
 | 
			
		||||
		v0.AddArg2(src, mem)
 | 
			
		||||
		v1 := b.NewValue0(v.Pos, OpRISCV64MOVWstore, types.TypeMem)
 | 
			
		||||
		v1.AuxInt = int32ToAuxInt(4)
 | 
			
		||||
		v2 := b.NewValue0(v.Pos, OpRISCV64MOVWload, typ.Int32)
 | 
			
		||||
		v2.AuxInt = int32ToAuxInt(4)
 | 
			
		||||
		v2.AddArg2(src, mem)
 | 
			
		||||
		v3 := b.NewValue0(v.Pos, OpRISCV64MOVWstore, types.TypeMem)
 | 
			
		||||
		v4 := b.NewValue0(v.Pos, OpRISCV64MOVWload, typ.Int32)
 | 
			
		||||
		v4.AddArg2(src, mem)
 | 
			
		||||
		v3.AddArg3(dst, v4, mem)
 | 
			
		||||
		v1.AddArg3(dst, v2, v3)
 | 
			
		||||
		v.AddArg3(dst, v0, v1)
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
	// match: (Move [16] {t} dst src mem)
 | 
			
		||||
	// cond: t.Alignment()%8 == 0
 | 
			
		||||
	// result: (MOVDstore [8] dst (MOVDload [8] src mem) (MOVDstore dst (MOVDload src mem) mem))
 | 
			
		||||
	for {
 | 
			
		||||
		if auxIntToInt64(v.AuxInt) != 16 {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		t := auxToType(v.Aux)
 | 
			
		||||
		dst := v_0
 | 
			
		||||
		src := v_1
 | 
			
		||||
		mem := v_2
 | 
			
		||||
		if !(t.Alignment()%8 == 0) {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		v.reset(OpRISCV64MOVDstore)
 | 
			
		||||
		v.AuxInt = int32ToAuxInt(8)
 | 
			
		||||
		v0 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64)
 | 
			
		||||
		v0.AuxInt = int32ToAuxInt(8)
 | 
			
		||||
		v0.AddArg2(src, mem)
 | 
			
		||||
		v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
 | 
			
		||||
		v2 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64)
 | 
			
		||||
		v2.AddArg2(src, mem)
 | 
			
		||||
		v1.AddArg3(dst, v2, mem)
 | 
			
		||||
		v.AddArg3(dst, v0, v1)
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
	// match: (Move [24] {t} dst src mem)
 | 
			
		||||
	// cond: t.Alignment()%8 == 0
 | 
			
		||||
	// result: (MOVDstore [16] dst (MOVDload [16] src mem) (MOVDstore [8] dst (MOVDload [8] src mem) (MOVDstore dst (MOVDload src mem) mem)))
 | 
			
		||||
	for {
 | 
			
		||||
		if auxIntToInt64(v.AuxInt) != 24 {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		t := auxToType(v.Aux)
 | 
			
		||||
		dst := v_0
 | 
			
		||||
		src := v_1
 | 
			
		||||
		mem := v_2
 | 
			
		||||
		if !(t.Alignment()%8 == 0) {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		v.reset(OpRISCV64MOVDstore)
 | 
			
		||||
		v.AuxInt = int32ToAuxInt(16)
 | 
			
		||||
		v0 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64)
 | 
			
		||||
		v0.AuxInt = int32ToAuxInt(16)
 | 
			
		||||
		v0.AddArg2(src, mem)
 | 
			
		||||
		v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
 | 
			
		||||
		v1.AuxInt = int32ToAuxInt(8)
 | 
			
		||||
		v2 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64)
 | 
			
		||||
		v2.AuxInt = int32ToAuxInt(8)
 | 
			
		||||
		v2.AddArg2(src, mem)
 | 
			
		||||
		v3 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
 | 
			
		||||
		v4 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64)
 | 
			
		||||
		v4.AddArg2(src, mem)
 | 
			
		||||
		v3.AddArg3(dst, v4, mem)
 | 
			
		||||
		v1.AddArg3(dst, v2, v3)
 | 
			
		||||
		v.AddArg3(dst, v0, v1)
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
	// match: (Move [32] {t} dst src mem)
 | 
			
		||||
	// cond: t.Alignment()%8 == 0
 | 
			
		||||
	// result: (MOVDstore [24] dst (MOVDload [24] src mem) (MOVDstore [16] dst (MOVDload [16] src mem) (MOVDstore [8] dst (MOVDload [8] src mem) (MOVDstore dst (MOVDload src mem) mem))))
 | 
			
		||||
	for {
 | 
			
		||||
		if auxIntToInt64(v.AuxInt) != 32 {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		t := auxToType(v.Aux)
 | 
			
		||||
		dst := v_0
 | 
			
		||||
		src := v_1
 | 
			
		||||
		mem := v_2
 | 
			
		||||
		if !(t.Alignment()%8 == 0) {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		v.reset(OpRISCV64MOVDstore)
 | 
			
		||||
		v.AuxInt = int32ToAuxInt(24)
 | 
			
		||||
		v0 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64)
 | 
			
		||||
		v0.AuxInt = int32ToAuxInt(24)
 | 
			
		||||
		v0.AddArg2(src, mem)
 | 
			
		||||
		v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
 | 
			
		||||
		v1.AuxInt = int32ToAuxInt(16)
 | 
			
		||||
		v2 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64)
 | 
			
		||||
		v2.AuxInt = int32ToAuxInt(16)
 | 
			
		||||
		v2.AddArg2(src, mem)
 | 
			
		||||
		v3 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
 | 
			
		||||
		v3.AuxInt = int32ToAuxInt(8)
 | 
			
		||||
		v4 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64)
 | 
			
		||||
		v4.AuxInt = int32ToAuxInt(8)
 | 
			
		||||
		v4.AddArg2(src, mem)
 | 
			
		||||
		v5 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
 | 
			
		||||
		v6 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64)
 | 
			
		||||
		v6.AddArg2(src, mem)
 | 
			
		||||
		v5.AddArg3(dst, v6, mem)
 | 
			
		||||
		v3.AddArg3(dst, v4, v5)
 | 
			
		||||
		v1.AddArg3(dst, v2, v3)
 | 
			
		||||
		v.AddArg3(dst, v0, v1)
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
	// match: (Move [s] {t} dst src mem)
 | 
			
		||||
	// cond: s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0 && logLargeCopy(v, s)
 | 
			
		||||
	// result: (DUFFCOPY [16 * (128 - s/8)] dst src mem)
 | 
			
		||||
	// cond: s > 0 && s <= 3*8*moveSize(t.Alignment(), config) && logLargeCopy(v, s)
 | 
			
		||||
	// result: (LoweredMove [makeValAndOff(int32(s),int32(t.Alignment()))] dst src mem)
 | 
			
		||||
	for {
 | 
			
		||||
		s := auxIntToInt64(v.AuxInt)
 | 
			
		||||
		t := auxToType(v.Aux)
 | 
			
		||||
		dst := v_0
 | 
			
		||||
		src := v_1
 | 
			
		||||
		mem := v_2
 | 
			
		||||
		if !(s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0 && logLargeCopy(v, s)) {
 | 
			
		||||
		if !(s > 0 && s <= 3*8*moveSize(t.Alignment(), config) && logLargeCopy(v, s)) {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		v.reset(OpRISCV64DUFFCOPY)
 | 
			
		||||
		v.AuxInt = int64ToAuxInt(16 * (128 - s/8))
 | 
			
		||||
		v.reset(OpRISCV64LoweredMove)
 | 
			
		||||
		v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(s), int32(t.Alignment())))
 | 
			
		||||
		v.AddArg3(dst, src, mem)
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
	// match: (Move [s] {t} dst src mem)
 | 
			
		||||
	// cond: (s <= 16 || logLargeCopy(v, s))
 | 
			
		||||
	// result: (LoweredMove [t.Alignment()] dst src (ADDI <src.Type> [s-moveSize(t.Alignment(), config)] src) mem)
 | 
			
		||||
	// cond: s > 3*8*moveSize(t.Alignment(), config) && logLargeCopy(v, s)
 | 
			
		||||
	// result: (LoweredMoveLoop [makeValAndOff(int32(s),int32(t.Alignment()))] dst src mem)
 | 
			
		||||
	for {
 | 
			
		||||
		s := auxIntToInt64(v.AuxInt)
 | 
			
		||||
		t := auxToType(v.Aux)
 | 
			
		||||
		dst := v_0
 | 
			
		||||
		src := v_1
 | 
			
		||||
		mem := v_2
 | 
			
		||||
		if !(s <= 16 || logLargeCopy(v, s)) {
 | 
			
		||||
		if !(s > 3*8*moveSize(t.Alignment(), config) && logLargeCopy(v, s)) {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		v.reset(OpRISCV64LoweredMove)
 | 
			
		||||
		v.AuxInt = int64ToAuxInt(t.Alignment())
 | 
			
		||||
		v0 := b.NewValue0(v.Pos, OpRISCV64ADDI, src.Type)
 | 
			
		||||
		v0.AuxInt = int64ToAuxInt(s - moveSize(t.Alignment(), config))
 | 
			
		||||
		v0.AddArg(src)
 | 
			
		||||
		v.AddArg4(dst, src, v0, mem)
 | 
			
		||||
		v.reset(OpRISCV64LoweredMoveLoop)
 | 
			
		||||
		v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(s), int32(t.Alignment())))
 | 
			
		||||
		v.AddArg3(dst, src, mem)
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
	return false
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue