2020-07-09 15:47:26 -04:00
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
2020-07-27 16:46:35 -04:00
import (
"cmd/compile/internal/types"
"cmd/internal/src"
"fmt"
"sort"
)
2020-07-09 15:47:26 -04:00
// expandCalls converts LE (Late Expansion) calls that act like they receive value args into a lower-level form
2020-07-27 16:46:35 -04:00
// that is more oriented to a platform's ABI. The SelectN operations that extract results are rewritten into
// more appropriate forms, and any StructMake or ArrayMake inputs are decomposed until non-struct values are
// reached (for now, Strings, Slices, Complex, and Interface are not decomposed because they are rewritten in
// a subsequent phase, but that may need to change for a register ABI in case one of those composite values is
// split between registers and memory).
//
// TODO: when it comes time to use registers, might want to include builtin selectors as well, but currently that happens in lower.
2020-07-09 15:47:26 -04:00
func expandCalls ( f * Func ) {
2020-07-27 16:46:35 -04:00
if ! LateCallExpansionEnabledWithin ( f ) {
return
}
2020-07-09 15:47:26 -04:00
canSSAType := f . fe . CanSSA
2020-07-27 16:46:35 -04:00
regSize := f . Config . RegSize
2020-07-09 15:47:26 -04:00
sp , _ := f . spSb ( )
2020-07-27 16:46:35 -04:00
debug := f . pass . debug > 0
// For 32-bit, need to deal with decomposition of 64-bit integers
tUint32 := types . Types [ types . TUINT32 ]
tInt32 := types . Types [ types . TINT32 ]
var hiOffset , lowOffset int64
if f . Config . BigEndian {
lowOffset = 4
} else {
hiOffset = 4
}
2020-08-07 22:46:43 -04:00
2020-07-27 16:46:35 -04:00
pairTypes := func ( et types . EType ) ( tHi , tLo * types . Type ) {
tHi = tUint32
if et == types . TINT64 {
tHi = tInt32
}
tLo = tUint32
return
}
// isAlreadyExpandedAggregateType returns whether a type is an SSA-able "aggregate" (multiple register) type
// that was expanded in an earlier phase (small user-defined arrays and structs, lowered in decomposeUser).
// Other aggregate types are expanded in decomposeBuiltin, which comes later.
isAlreadyExpandedAggregateType := func ( t * types . Type ) bool {
if ! canSSAType ( t ) {
return false
}
return t . IsStruct ( ) || t . IsArray ( ) || regSize == 4 && t . Size ( ) > 4 && t . IsInteger ( )
}
2020-07-09 15:47:26 -04:00
// Calls that need lowering have some number of inputs, including a memory input,
// and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able.
// With the current ABI those inputs need to be converted into stores to memory,
// rethreading the call's memory input to the first, and the new call now receiving the last.
// With the current ABI, the outputs need to be converted to loads, which will all use the call's
// memory output as their input.
2020-07-27 16:46:35 -04:00
// rewriteSelect recursively walks leaf selector to a root (OpSelectN) through
// a chain of Struct/Array Select operations. If the chain of selectors does not
// end in OpSelectN, it does nothing (this can happen depending on compiler phase ordering).
// It emits the code necessary to implement the leaf select operation that leads to the call.
// TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
var rewriteSelect func ( leaf * Value , selector * Value , offset int64 )
rewriteSelect = func ( leaf * Value , selector * Value , offset int64 ) {
switch selector . Op {
case OpSelectN :
// TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there.
call := selector . Args [ 0 ]
aux := call . Aux . ( * AuxCall )
which := selector . AuxInt
if which == aux . NResults ( ) { // mem is after the results.
// rewrite v as a Copy of call -- the replacement call will produce a mem.
leaf . copyOf ( call )
} else {
leafType := leaf . Type
pt := types . NewPtr ( leafType )
if canSSAType ( leafType ) {
off := f . ConstOffPtrSP ( pt , offset + aux . OffsetOfResult ( which ) , sp )
// Any selection right out of the arg area/registers has to be same Block as call, use call as mem input.
if leaf . Block == call . Block {
leaf . reset ( OpLoad )
leaf . SetArgs2 ( off , call )
2020-07-09 15:47:26 -04:00
} else {
2020-07-27 16:46:35 -04:00
w := call . Block . NewValue2 ( leaf . Pos , OpLoad , leafType , off , call )
leaf . copyOf ( w )
2020-07-09 15:47:26 -04:00
}
2020-07-27 16:46:35 -04:00
} else {
panic ( "Should not have non-SSA-able OpSelectN" )
2020-07-09 15:47:26 -04:00
}
2020-07-27 16:46:35 -04:00
}
case OpStructSelect :
w := selector . Args [ 0 ]
if w . Type . Etype != types . TSTRUCT {
fmt . Printf ( "Bad type for w:\nv=%v\nsel=%v\nw=%v\n,f=%s\n" , leaf . LongString ( ) , selector . LongString ( ) , w . LongString ( ) , f . Name )
}
rewriteSelect ( leaf , w , offset + w . Type . FieldOff ( int ( selector . AuxInt ) ) )
2020-07-09 15:47:26 -04:00
2020-07-27 16:46:35 -04:00
case OpInt64Hi :
w := selector . Args [ 0 ]
rewriteSelect ( leaf , w , offset + hiOffset )
case OpInt64Lo :
w := selector . Args [ 0 ]
rewriteSelect ( leaf , w , offset + lowOffset )
case OpArraySelect :
w := selector . Args [ 0 ]
rewriteSelect ( leaf , w , offset + selector . Type . Size ( ) * selector . AuxInt )
default :
// Ignore dead ends; on 32-bit, these can occur running before decompose builtins.
}
}
// storeArg converts stores of SSA-able aggregates into a series of stores of smaller types into
// individual parameter slots.
// TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
var storeArg func ( pos src . XPos , b * Block , a * Value , t * types . Type , offset int64 , mem * Value ) * Value
storeArg = func ( pos src . XPos , b * Block , a * Value , t * types . Type , offset int64 , mem * Value ) * Value {
switch a . Op {
case OpArrayMake0 , OpStructMake0 :
return mem
case OpStructMake1 , OpStructMake2 , OpStructMake3 , OpStructMake4 :
for i := 0 ; i < t . NumFields ( ) ; i ++ {
fld := t . Field ( i )
mem = storeArg ( pos , b , a . Args [ i ] , fld . Type , offset + fld . Offset , mem )
}
return mem
case OpArrayMake1 :
return storeArg ( pos , b , a . Args [ 0 ] , t . Elem ( ) , offset , mem )
case OpInt64Make :
tHi , tLo := pairTypes ( t . Etype )
mem = storeArg ( pos , b , a . Args [ 0 ] , tHi , offset + hiOffset , mem )
return storeArg ( pos , b , a . Args [ 1 ] , tLo , offset + lowOffset , mem )
}
dst := f . ConstOffPtrSP ( types . NewPtr ( t ) , offset , sp )
x := b . NewValue3A ( pos , OpStore , types . TypeMem , t , dst , a , mem )
if debug {
fmt . Printf ( "storeArg(%v) returns %s\n" , a , x . LongString ( ) )
}
return x
}
// offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP
// TODO should also optimize offsets from SB?
offsetFrom := func ( dst * Value , offset int64 , t * types . Type ) * Value {
pt := types . NewPtr ( t )
if offset == 0 && dst . Type == pt { // this is not actually likely
return dst
}
if dst . Op != OpOffPtr {
return dst . Block . NewValue1I ( dst . Pos . WithNotStmt ( ) , OpOffPtr , pt , offset , dst )
}
// Simplify OpOffPtr
from := dst . Args [ 0 ]
offset += dst . AuxInt
if from == sp {
return f . ConstOffPtrSP ( pt , offset , sp )
}
return dst . Block . NewValue1I ( dst . Pos . WithNotStmt ( ) , OpOffPtr , pt , offset , from )
}
// splitStore converts a store of an SSA-able aggregate into a series of smaller stores, emitting
// appropriate Struct/Array Select operations (which will soon go dead) to obtain the parts.
var splitStore func ( dst , src , mem , v * Value , t * types . Type , offset int64 , firstStorePos src . XPos ) * Value
splitStore = func ( dst , src , mem , v * Value , t * types . Type , offset int64 , firstStorePos src . XPos ) * Value {
// TODO might be worth commoning up duplicate selectors, but since they go dead, maybe no point.
pos := v . Pos . WithNotStmt ( )
switch t . Etype {
case types . TINT64 , types . TUINT64 :
if t . Width == regSize {
break
2020-07-09 15:47:26 -04:00
}
2020-07-27 16:46:35 -04:00
tHi , tLo := pairTypes ( t . Etype )
sel := src . Block . NewValue1 ( pos , OpInt64Hi , tHi , src )
mem = splitStore ( dst , sel , mem , v , tHi , offset + hiOffset , firstStorePos )
firstStorePos = firstStorePos . WithNotStmt ( )
sel = src . Block . NewValue1 ( pos , OpInt64Lo , tLo , src )
return splitStore ( dst , sel , mem , v , tLo , offset + lowOffset , firstStorePos )
case types . TARRAY :
elt := t . Elem ( )
for i := int64 ( 0 ) ; i < t . NumElem ( ) ; i ++ {
sel := src . Block . NewValue1I ( pos , OpArraySelect , elt , i , src )
mem = splitStore ( dst , sel , mem , v , elt , offset + i * elt . Width , firstStorePos )
firstStorePos = firstStorePos . WithNotStmt ( )
}
return mem
case types . TSTRUCT :
if src . Op == OpIData && t . NumFields ( ) == 1 && t . Field ( 0 ) . Type . Width == t . Width && t . Width == regSize {
// This peculiar test deals with accesses to immediate interface data.
// It works okay because everything is the same size.
// Example code that triggers this can be found in go/constant/value.go, function ToComplex
// v119 (+881) = IData <intVal> v6
// v121 (+882) = StaticLECall <floatVal,mem> {AuxCall{"".itof([intVal,0])[floatVal,8]}} [16] v119 v1
// This corresponds to the generic rewrite rule "(StructSelect [0] (IData x)) => (IData x)"
// Guard against "struct{struct{*foo}}"
for t . Etype == types . TSTRUCT && t . NumFields ( ) == 1 {
t = t . Field ( 0 ) . Type
}
if t . Etype == types . TSTRUCT || t . Etype == types . TARRAY {
f . Fatalf ( "Did not expect to find IDATA-immediate with non-trivial struct in it" )
}
break // handle the leaf type.
}
for i := 0 ; i < t . NumFields ( ) ; i ++ {
fld := t . Field ( i )
sel := src . Block . NewValue1I ( pos , OpStructSelect , fld . Type , int64 ( i ) , src )
mem = splitStore ( dst , sel , mem , v , fld . Type , offset + fld . Offset , firstStorePos )
firstStorePos = firstStorePos . WithNotStmt ( )
}
return mem
2020-07-09 15:47:26 -04:00
}
2020-07-27 16:46:35 -04:00
// Default, including for aggregates whose single element exactly fills their container
// TODO this will be a problem for cast interfaces containing floats when we move to registers.
x := v . Block . NewValue3A ( firstStorePos , OpStore , types . TypeMem , t , offsetFrom ( dst , offset , t ) , src , mem )
if debug {
fmt . Printf ( "splitStore(%v, %v, %v, %v) returns %s\n" , dst , src , mem , v , x . LongString ( ) )
}
return x
2020-07-09 15:47:26 -04:00
}
2020-08-07 22:46:43 -04:00
rewriteArgs := func ( v * Value , firstArg int ) * Value {
// Thread the stores on the memory arg
aux := v . Aux . ( * AuxCall )
pos := v . Pos . WithNotStmt ( )
m0 := v . Args [ len ( v . Args ) - 1 ]
mem := m0
for i , a := range v . Args {
if i < firstArg {
continue
}
if a == m0 { // mem is last.
break
}
auxI := int64 ( i - firstArg )
if a . Op == OpDereference {
if a . MemoryArg ( ) != m0 {
f . Fatalf ( "Op...LECall and OpDereference have mismatched mem, %s and %s" , v . LongString ( ) , a . LongString ( ) )
}
// "Dereference" of addressed (probably not-SSA-eligible) value becomes Move
// TODO this will be more complicated with registers in the picture.
src := a . Args [ 0 ]
dst := f . ConstOffPtrSP ( src . Type , aux . OffsetOfArg ( auxI ) , sp )
if a . Uses == 1 {
a . reset ( OpMove )
a . Pos = pos
a . Type = types . TypeMem
a . Aux = aux . TypeOfArg ( auxI )
a . AuxInt = aux . SizeOfArg ( auxI )
a . SetArgs3 ( dst , src , mem )
mem = a
} else {
mem = a . Block . NewValue3A ( pos , OpMove , types . TypeMem , aux . TypeOfArg ( auxI ) , dst , src , mem )
mem . AuxInt = aux . SizeOfArg ( auxI )
}
} else {
mem = storeArg ( pos , v . Block , a , aux . TypeOfArg ( auxI ) , aux . OffsetOfArg ( auxI ) , mem )
}
}
v . resetArgs ( )
return mem
}
2020-07-27 16:46:35 -04:00
// Step 0: rewrite the calls to convert incoming args to stores.
2020-07-09 15:47:26 -04:00
for _ , b := range f . Blocks {
for _ , v := range b . Values {
switch v . Op {
case OpStaticLECall :
2020-08-07 22:46:43 -04:00
mem := rewriteArgs ( v , 0 )
2020-07-09 15:47:26 -04:00
v . SetArgs1 ( mem )
2020-08-07 22:46:43 -04:00
case OpClosureLECall :
code := v . Args [ 0 ]
context := v . Args [ 1 ]
mem := rewriteArgs ( v , 2 )
v . SetArgs3 ( code , context , mem )
case OpInterLECall :
code := v . Args [ 0 ]
mem := rewriteArgs ( v , 1 )
v . SetArgs2 ( code , mem )
2020-07-09 15:47:26 -04:00
}
}
}
2020-07-27 16:46:35 -04:00
// Step 1: any stores of aggregates remaining are believed to be sourced from call results.
// Decompose those stores into a series of smaller stores, adding selection ops as necessary.
for _ , b := range f . Blocks {
for _ , v := range b . Values {
if v . Op == OpStore {
t := v . Aux . ( * types . Type )
if isAlreadyExpandedAggregateType ( t ) {
dst , src , mem := v . Args [ 0 ] , v . Args [ 1 ] , v . Args [ 2 ]
mem = splitStore ( dst , src , mem , v , t , 0 , v . Pos )
v . copyOf ( mem )
}
}
}
}
val2Preds := make ( map [ * Value ] int32 ) // Used to accumulate dependency graph of selection operations for topological ordering.
// Step 2: accumulate selection operations for rewrite in topological order.
// Any select-for-addressing applied to call results can be transformed directly.
// TODO this is overkill; with the transformation of aggregate references into series of leaf references, it is only necessary to remember and recurse on the leaves.
for _ , b := range f . Blocks {
for _ , v := range b . Values {
// Accumulate chains of selectors for processing in topological order
switch v . Op {
case OpStructSelect , OpArraySelect , OpInt64Hi , OpInt64Lo :
w := v . Args [ 0 ]
switch w . Op {
case OpStructSelect , OpArraySelect , OpInt64Hi , OpInt64Lo , OpSelectN :
val2Preds [ w ] += 1
if debug {
fmt . Printf ( "v2p[%s] = %d\n" , w . LongString ( ) , val2Preds [ w ] )
}
}
fallthrough
case OpSelectN :
if _ , ok := val2Preds [ v ] ; ! ok {
val2Preds [ v ] = 0
if debug {
fmt . Printf ( "v2p[%s] = %d\n" , v . LongString ( ) , val2Preds [ v ] )
}
}
case OpSelectNAddr :
// Do these directly, there are no chains of selectors.
call := v . Args [ 0 ]
which := v . AuxInt
aux := call . Aux . ( * AuxCall )
pt := v . Type
off := f . ConstOffPtrSP ( pt , aux . OffsetOfResult ( which ) , sp )
v . copyOf ( off )
}
}
}
// Compilation must be deterministic
var ordered [ ] * Value
less := func ( i , j int ) bool { return ordered [ i ] . ID < ordered [ j ] . ID }
// Step 3: Rewrite in topological order. All chains of selectors end up in same block as the call.
for len ( val2Preds ) > 0 {
ordered = ordered [ : 0 ]
for v , n := range val2Preds {
if n == 0 {
ordered = append ( ordered , v )
}
}
sort . Slice ( ordered , less )
for _ , v := range ordered {
for {
w := v . Args [ 0 ]
if debug {
fmt . Printf ( "About to rewrite %s, args[0]=%s\n" , v . LongString ( ) , w . LongString ( ) )
}
delete ( val2Preds , v )
rewriteSelect ( v , v , 0 )
v = w
n , ok := val2Preds [ v ]
if ! ok {
break
}
if n != 1 {
val2Preds [ v ] = n - 1
break
}
// Loop on new v; val2Preds[v] == 1 will be deleted in that iteration, no need to store zero.
}
}
}
// Step 4: rewrite the calls themselves, correcting the type
for _ , b := range f . Blocks {
for _ , v := range b . Values {
switch v . Op {
case OpStaticLECall :
v . Op = OpStaticCall
v . Type = types . TypeMem
2020-08-07 22:46:43 -04:00
case OpClosureLECall :
v . Op = OpClosureCall
v . Type = types . TypeMem
case OpInterLECall :
v . Op = OpInterCall
v . Type = types . TypeMem
2020-07-27 16:46:35 -04:00
}
}
}
// Step 5: elide any copies introduced.
for _ , b := range f . Blocks {
for _ , v := range b . Values {
for i , a := range v . Args {
if a . Op != OpCopy {
continue
}
aa := copySource ( a )
v . SetArg ( i , aa )
for a . Uses == 0 {
b := a . Args [ 0 ]
a . reset ( OpInvalid )
a = b
}
}
}
}
2020-07-09 15:47:26 -04:00
}