diff --git a/misc/cgo/test/test.go b/misc/cgo/test/test.go index 8c69ad91ac7..35bc3a14475 100644 --- a/misc/cgo/test/test.go +++ b/misc/cgo/test/test.go @@ -901,6 +901,12 @@ typedef struct S32579 { unsigned char data[1]; } S32579; // issue 38649 // Test that #define'd type aliases work. #define netbsd_gid unsigned int + +// issue 40494 +// Inconsistent handling of tagged enum and union types. +enum Enum40494 { X_40494 }; +union Union40494 { int x; }; +void issue40494(enum Enum40494 e, union Union40494* up) {} */ import "C" @@ -2204,3 +2210,10 @@ var issue38649 C.netbsd_gid = 42 // issue 39877 var issue39877 *C.void = nil + +// issue 40494 +// No runtime test; just make sure it compiles. + +func Issue40494() { + C.issue40494(C.enum_Enum40494(C.X_40494), (*C.union_Union40494)(nil)) +} diff --git a/misc/cgo/testshared/shared_test.go b/misc/cgo/testshared/shared_test.go index f8dabbe7a01..5e0893784b6 100644 --- a/misc/cgo/testshared/shared_test.go +++ b/misc/cgo/testshared/shared_test.go @@ -462,6 +462,7 @@ func TestTrivialExecutable(t *testing.T) { run(t, "trivial executable", "../../bin/trivial") AssertIsLinkedTo(t, "../../bin/trivial", soname) AssertHasRPath(t, "../../bin/trivial", gorootInstallDir) + checkSize(t, "../../bin/trivial", 100000) // it is 19K on linux/amd64, 100K should be enough } // Build a trivial program in PIE mode that links against the shared runtime and check it runs. @@ -470,6 +471,18 @@ func TestTrivialExecutablePIE(t *testing.T) { run(t, "trivial executable", "./trivial.pie") AssertIsLinkedTo(t, "./trivial.pie", soname) AssertHasRPath(t, "./trivial.pie", gorootInstallDir) + checkSize(t, "./trivial.pie", 100000) // it is 19K on linux/amd64, 100K should be enough +} + +// Check that the file size does not exceed a limit. +func checkSize(t *testing.T, f string, limit int64) { + fi, err := os.Stat(f) + if err != nil { + t.Fatalf("stat failed: %v", err) + } + if sz := fi.Size(); sz > limit { + t.Errorf("file too large: got %d, want <= %d", sz, limit) + } } // Build a division test program and check it runs. diff --git a/src/cmd/cgo/out.go b/src/cmd/cgo/out.go index 6c221473e08..4064f0ae418 100644 --- a/src/cmd/cgo/out.go +++ b/src/cmd/cgo/out.go @@ -123,7 +123,9 @@ func (p *Package) writeDefs() { // Moreover, empty file name makes compile emit no source debug info at all. var buf bytes.Buffer noSourceConf.Fprint(&buf, fset, def.Go) - if bytes.HasPrefix(buf.Bytes(), []byte("_Ctype_")) { + if bytes.HasPrefix(buf.Bytes(), []byte("_Ctype_")) || + strings.HasPrefix(name, "_Ctype_enum_") || + strings.HasPrefix(name, "_Ctype_union_") { // This typedef is of the form `typedef a b` and should be an alias. fmt.Fprintf(fgo2, "= ") } diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 47cb422ab11..9d8a0920b39 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -874,7 +874,11 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8, ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8, ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8, - ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8: + ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8, + ssa.OpAMD64ADDSSloadidx1, ssa.OpAMD64ADDSSloadidx4, ssa.OpAMD64ADDSDloadidx1, ssa.OpAMD64ADDSDloadidx8, + ssa.OpAMD64SUBSSloadidx1, ssa.OpAMD64SUBSSloadidx4, ssa.OpAMD64SUBSDloadidx1, ssa.OpAMD64SUBSDloadidx8, + ssa.OpAMD64MULSSloadidx1, ssa.OpAMD64MULSSloadidx4, ssa.OpAMD64MULSDloadidx1, ssa.OpAMD64MULSDloadidx8, + ssa.OpAMD64DIVSSloadidx1, ssa.OpAMD64DIVSSloadidx4, ssa.OpAMD64DIVSDloadidx1, ssa.OpAMD64DIVSDloadidx8: p := s.Prog(v.Op.Asm()) r, i := v.Args[1].Reg(), v.Args[2].Reg() diff --git a/src/cmd/compile/internal/ssa/addressingmodes.go b/src/cmd/compile/internal/ssa/addressingmodes.go index 78c979b7cb0..97a5ab4f031 100644 --- a/src/cmd/compile/internal/ssa/addressingmodes.go +++ b/src/cmd/compile/internal/ssa/addressingmodes.go @@ -321,6 +321,23 @@ var combine = map[[2]Op]Op{ [2]Op{OpAMD64XORQconstmodify, OpAMD64LEAQ1}: OpAMD64XORQconstmodifyidx1, [2]Op{OpAMD64XORQconstmodify, OpAMD64LEAQ8}: OpAMD64XORQconstmodifyidx8, + [2]Op{OpAMD64ADDSSload, OpAMD64LEAQ1}: OpAMD64ADDSSloadidx1, + [2]Op{OpAMD64ADDSSload, OpAMD64LEAQ4}: OpAMD64ADDSSloadidx4, + [2]Op{OpAMD64ADDSDload, OpAMD64LEAQ1}: OpAMD64ADDSDloadidx1, + [2]Op{OpAMD64ADDSDload, OpAMD64LEAQ8}: OpAMD64ADDSDloadidx8, + [2]Op{OpAMD64SUBSSload, OpAMD64LEAQ1}: OpAMD64SUBSSloadidx1, + [2]Op{OpAMD64SUBSSload, OpAMD64LEAQ4}: OpAMD64SUBSSloadidx4, + [2]Op{OpAMD64SUBSDload, OpAMD64LEAQ1}: OpAMD64SUBSDloadidx1, + [2]Op{OpAMD64SUBSDload, OpAMD64LEAQ8}: OpAMD64SUBSDloadidx8, + [2]Op{OpAMD64MULSSload, OpAMD64LEAQ1}: OpAMD64MULSSloadidx1, + [2]Op{OpAMD64MULSSload, OpAMD64LEAQ4}: OpAMD64MULSSloadidx4, + [2]Op{OpAMD64MULSDload, OpAMD64LEAQ1}: OpAMD64MULSDloadidx1, + [2]Op{OpAMD64MULSDload, OpAMD64LEAQ8}: OpAMD64MULSDloadidx8, + [2]Op{OpAMD64DIVSSload, OpAMD64LEAQ1}: OpAMD64DIVSSloadidx1, + [2]Op{OpAMD64DIVSSload, OpAMD64LEAQ4}: OpAMD64DIVSSloadidx4, + [2]Op{OpAMD64DIVSDload, OpAMD64LEAQ1}: OpAMD64DIVSDloadidx1, + [2]Op{OpAMD64DIVSDload, OpAMD64LEAQ8}: OpAMD64DIVSDloadidx8, + // 386 [2]Op{Op386MOVBload, Op386ADDL}: Op386MOVBloadidx1, [2]Op{Op386MOVWload, Op386ADDL}: Op386MOVWloadidx1, diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index cd9cb515c02..a3b29049df6 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -149,14 +149,15 @@ func init() { gpstorexchg = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: []regMask{gp}} cmpxchg = regInfo{inputs: []regMask{gp, ax, gp, 0}, outputs: []regMask{gp, 0}, clobbers: ax} - fp01 = regInfo{inputs: nil, outputs: fponly} - fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: fponly} - fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: fponly} - fp21load = regInfo{inputs: []regMask{fp, gpspsb, 0}, outputs: fponly} - fpgp = regInfo{inputs: fponly, outputs: gponly} - gpfp = regInfo{inputs: gponly, outputs: fponly} - fp11 = regInfo{inputs: fponly, outputs: fponly} - fp2flags = regInfo{inputs: []regMask{fp, fp}} + fp01 = regInfo{inputs: nil, outputs: fponly} + fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: fponly} + fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: fponly} + fp21load = regInfo{inputs: []regMask{fp, gpspsb, 0}, outputs: fponly} + fp21loadidx = regInfo{inputs: []regMask{fp, gpspsb, gpspsb, 0}, outputs: fponly} + fpgp = regInfo{inputs: fponly, outputs: gponly} + gpfp = regInfo{inputs: gponly, outputs: fponly} + fp11 = regInfo{inputs: fponly, outputs: fponly} + fp2flags = regInfo{inputs: []regMask{fp, fp}} fpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: fponly} fploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: fponly} @@ -201,6 +202,23 @@ func init() { {name: "DIVSSload", argLength: 3, reg: fp21load, asm: "DIVSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp32 arg0 / tmp, tmp loaded from arg1+auxint+aux, arg2 = mem {name: "DIVSDload", argLength: 3, reg: fp21load, asm: "DIVSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 / tmp, tmp loaded from arg1+auxint+aux, arg2 = mem + {name: "ADDSSloadidx1", argLength: 4, reg: fp21loadidx, asm: "ADDSS", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp32 arg0 + tmp, tmp loaded from arg1+arg2+auxint+aux, arg3 = mem + {name: "ADDSSloadidx4", argLength: 4, reg: fp21loadidx, asm: "ADDSS", scale: 4, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp32 arg0 + tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem + {name: "ADDSDloadidx1", argLength: 4, reg: fp21loadidx, asm: "ADDSD", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp64 arg0 + tmp, tmp loaded from arg1+arg2+auxint+aux, arg3 = mem + {name: "ADDSDloadidx8", argLength: 4, reg: fp21loadidx, asm: "ADDSD", scale: 8, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp64 arg0 + tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem + {name: "SUBSSloadidx1", argLength: 4, reg: fp21loadidx, asm: "SUBSS", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp32 arg0 - tmp, tmp loaded from arg1+arg2+auxint+aux, arg3 = mem + {name: "SUBSSloadidx4", argLength: 4, reg: fp21loadidx, asm: "SUBSS", scale: 4, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp32 arg0 - tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem + {name: "SUBSDloadidx1", argLength: 4, reg: fp21loadidx, asm: "SUBSD", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp64 arg0 - tmp, tmp loaded from arg1+arg2+auxint+aux, arg3 = mem + {name: "SUBSDloadidx8", argLength: 4, reg: fp21loadidx, asm: "SUBSD", scale: 8, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp64 arg0 - tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem + {name: "MULSSloadidx1", argLength: 4, reg: fp21loadidx, asm: "MULSS", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp32 arg0 * tmp, tmp loaded from arg1+arg2+auxint+aux, arg3 = mem + {name: "MULSSloadidx4", argLength: 4, reg: fp21loadidx, asm: "MULSS", scale: 4, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp32 arg0 * tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem + {name: "MULSDloadidx1", argLength: 4, reg: fp21loadidx, asm: "MULSD", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp64 arg0 * tmp, tmp loaded from arg1+arg2+auxint+aux, arg3 = mem + {name: "MULSDloadidx8", argLength: 4, reg: fp21loadidx, asm: "MULSD", scale: 8, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp64 arg0 * tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem + {name: "DIVSSloadidx1", argLength: 4, reg: fp21loadidx, asm: "DIVSS", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp32 arg0 / tmp, tmp loaded from arg1+arg2+auxint+aux, arg3 = mem + {name: "DIVSSloadidx4", argLength: 4, reg: fp21loadidx, asm: "DIVSS", scale: 4, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp32 arg0 / tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem + {name: "DIVSDloadidx1", argLength: 4, reg: fp21loadidx, asm: "DIVSD", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp64 arg0 / tmp, tmp loaded from arg1+arg2+auxint+aux, arg3 = mem + {name: "DIVSDloadidx8", argLength: 4, reg: fp21loadidx, asm: "DIVSD", scale: 8, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // fp64 arg0 / tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem + // binary ops {name: "ADDQ", argLength: 2, reg: gp21sp, asm: "ADDQ", commutative: true, clobberFlags: true}, // arg0 + arg1 {name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true, clobberFlags: true}, // arg0 + arg1 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index d27682e3b38..9efa1bfcc4b 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -572,6 +572,22 @@ const ( OpAMD64MULSDload OpAMD64DIVSSload OpAMD64DIVSDload + OpAMD64ADDSSloadidx1 + OpAMD64ADDSSloadidx4 + OpAMD64ADDSDloadidx1 + OpAMD64ADDSDloadidx8 + OpAMD64SUBSSloadidx1 + OpAMD64SUBSSloadidx4 + OpAMD64SUBSDloadidx1 + OpAMD64SUBSDloadidx8 + OpAMD64MULSSloadidx1 + OpAMD64MULSSloadidx4 + OpAMD64MULSDloadidx1 + OpAMD64MULSDloadidx8 + OpAMD64DIVSSloadidx1 + OpAMD64DIVSSloadidx4 + OpAMD64DIVSDloadidx1 + OpAMD64DIVSDloadidx8 OpAMD64ADDQ OpAMD64ADDL OpAMD64ADDQconst @@ -6576,6 +6592,310 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "ADDSSloadidx1", + auxType: auxSymOff, + argLen: 4, + resultInArg0: true, + symEffect: SymRead, + asm: x86.AADDSS, + scale: 1, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + {2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + outputs: []outputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + }, + }, + { + name: "ADDSSloadidx4", + auxType: auxSymOff, + argLen: 4, + resultInArg0: true, + symEffect: SymRead, + asm: x86.AADDSS, + scale: 4, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + {2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + outputs: []outputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + }, + }, + { + name: "ADDSDloadidx1", + auxType: auxSymOff, + argLen: 4, + resultInArg0: true, + symEffect: SymRead, + asm: x86.AADDSD, + scale: 1, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + {2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + outputs: []outputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + }, + }, + { + name: "ADDSDloadidx8", + auxType: auxSymOff, + argLen: 4, + resultInArg0: true, + symEffect: SymRead, + asm: x86.AADDSD, + scale: 8, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + {2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + outputs: []outputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + }, + }, + { + name: "SUBSSloadidx1", + auxType: auxSymOff, + argLen: 4, + resultInArg0: true, + symEffect: SymRead, + asm: x86.ASUBSS, + scale: 1, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + {2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + outputs: []outputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + }, + }, + { + name: "SUBSSloadidx4", + auxType: auxSymOff, + argLen: 4, + resultInArg0: true, + symEffect: SymRead, + asm: x86.ASUBSS, + scale: 4, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + {2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + outputs: []outputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + }, + }, + { + name: "SUBSDloadidx1", + auxType: auxSymOff, + argLen: 4, + resultInArg0: true, + symEffect: SymRead, + asm: x86.ASUBSD, + scale: 1, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + {2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + outputs: []outputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + }, + }, + { + name: "SUBSDloadidx8", + auxType: auxSymOff, + argLen: 4, + resultInArg0: true, + symEffect: SymRead, + asm: x86.ASUBSD, + scale: 8, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + {2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + outputs: []outputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + }, + }, + { + name: "MULSSloadidx1", + auxType: auxSymOff, + argLen: 4, + resultInArg0: true, + symEffect: SymRead, + asm: x86.AMULSS, + scale: 1, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + {2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + outputs: []outputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + }, + }, + { + name: "MULSSloadidx4", + auxType: auxSymOff, + argLen: 4, + resultInArg0: true, + symEffect: SymRead, + asm: x86.AMULSS, + scale: 4, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + {2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + outputs: []outputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + }, + }, + { + name: "MULSDloadidx1", + auxType: auxSymOff, + argLen: 4, + resultInArg0: true, + symEffect: SymRead, + asm: x86.AMULSD, + scale: 1, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + {2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + outputs: []outputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + }, + }, + { + name: "MULSDloadidx8", + auxType: auxSymOff, + argLen: 4, + resultInArg0: true, + symEffect: SymRead, + asm: x86.AMULSD, + scale: 8, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + {2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + outputs: []outputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + }, + }, + { + name: "DIVSSloadidx1", + auxType: auxSymOff, + argLen: 4, + resultInArg0: true, + symEffect: SymRead, + asm: x86.ADIVSS, + scale: 1, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + {2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + outputs: []outputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + }, + }, + { + name: "DIVSSloadidx4", + auxType: auxSymOff, + argLen: 4, + resultInArg0: true, + symEffect: SymRead, + asm: x86.ADIVSS, + scale: 4, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + {2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + outputs: []outputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + }, + }, + { + name: "DIVSDloadidx1", + auxType: auxSymOff, + argLen: 4, + resultInArg0: true, + symEffect: SymRead, + asm: x86.ADIVSD, + scale: 1, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + {2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + outputs: []outputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + }, + }, + { + name: "DIVSDloadidx8", + auxType: auxSymOff, + argLen: 4, + resultInArg0: true, + symEffect: SymRead, + asm: x86.ADIVSD, + scale: 8, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + {2, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + outputs: []outputInfo{ + {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 + }, + }, + }, { name: "ADDQ", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/prove.go b/src/cmd/compile/internal/ssa/prove.go index a8e43d01147..6c6be39d34b 100644 --- a/src/cmd/compile/internal/ssa/prove.go +++ b/src/cmd/compile/internal/ssa/prove.go @@ -1051,6 +1051,11 @@ func addLocalInductiveFacts(ft *factsTable, b *Block) { // // If all of these conditions are true, then i1 < max and i1 >= min. + // To ensure this is a loop header node. + if len(b.Preds) != 2 { + return + } + for _, i1 := range b.Values { if i1.Op != OpPhi { continue @@ -1093,6 +1098,9 @@ func addLocalInductiveFacts(ft *factsTable, b *Block) { } br = negative } + if br == unknown { + continue + } tr, has := domainRelationTable[control.Op] if !has { diff --git a/src/cmd/link/internal/ld/lib.go b/src/cmd/link/internal/ld/lib.go index ef4c86719e0..5271b8f3484 100644 --- a/src/cmd/link/internal/ld/lib.go +++ b/src/cmd/link/internal/ld/lib.go @@ -2070,17 +2070,6 @@ func ldshlibsyms(ctxt *Link, shlib string) { l.SetSymElfType(s, elf.ST_TYPE(elfsym.Info)) su.SetSize(int64(elfsym.Size)) if elfsym.Section != elf.SHN_UNDEF { - // If it's not undefined, mark the symbol as reachable - // so as to protect it from dead code elimination, - // even if there aren't any explicit references to it. - // Under the previous sym.Symbol based regime this - // wasn't necessary, but for the loader-based deadcode - // it is definitely needed. - // - // FIXME: have a more general/flexible mechanism for this? - // - l.SetAttrReachable(s, true) - // Set .File for the library that actually defines the symbol. l.SetSymPkg(s, libpath) diff --git a/src/crypto/ed25519/ed25519.go b/src/crypto/ed25519/ed25519.go index 5766970f827..6f59bb5cffb 100644 --- a/src/crypto/ed25519/ed25519.go +++ b/src/crypto/ed25519/ed25519.go @@ -154,7 +154,7 @@ func Sign(privateKey PrivateKey, message []byte) []byte { return signature } -func signGeneric(signature, privateKey, message []byte) { +func sign(signature, privateKey, message []byte) { if l := len(privateKey); l != PrivateKeySize { panic("ed25519: bad private key length: " + strconv.Itoa(l)) } @@ -201,10 +201,6 @@ func signGeneric(signature, privateKey, message []byte) { // Verify reports whether sig is a valid signature of message by publicKey. It // will panic if len(publicKey) is not PublicKeySize. func Verify(publicKey PublicKey, message, sig []byte) bool { - return verify(publicKey, message, sig) -} - -func verifyGeneric(publicKey PublicKey, message, sig []byte) bool { if l := len(publicKey); l != PublicKeySize { panic("ed25519: bad public key length: " + strconv.Itoa(l)) } diff --git a/src/crypto/ed25519/ed25519_noasm.go b/src/crypto/ed25519/ed25519_noasm.go deleted file mode 100644 index caa84f74fbc..00000000000 --- a/src/crypto/ed25519/ed25519_noasm.go +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2020 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !s390x - -package ed25519 - -func sign(signature, privateKey, message []byte) { - signGeneric(signature, privateKey, message) -} - -func verify(publicKey PublicKey, message, sig []byte) bool { - return verifyGeneric(publicKey, message, sig) -} diff --git a/src/crypto/ed25519/ed25519_s390x.go b/src/crypto/ed25519/ed25519_s390x.go deleted file mode 100644 index c8627a06523..00000000000 --- a/src/crypto/ed25519/ed25519_s390x.go +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2020 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package ed25519 - -import ( - "internal/cpu" - "strconv" -) - -//go:noescape -func kdsaSign(message, signature, privateKey []byte) bool - -//go:noescape -func kdsaVerify(message, signature, publicKey []byte) bool - -// sign does a check to see if hardware has Edwards Curve instruction available. -// If it does, use the hardware implementation. Otherwise, use the generic version. -func sign(signature, privateKey, message []byte) { - if cpu.S390X.HasEDDSA { - if l := len(privateKey); l != PrivateKeySize { - panic("ed25519: bad private key length: " + strconv.Itoa(l)) - } - - ret := kdsaSign(message, signature, privateKey[:32]) - if !ret { - panic("ed25519: kdsa sign has a failure") - } - return - } - signGeneric(signature, privateKey, message) -} - -// verify does a check to see if hardware has Edwards Curve instruction available. -// If it does, use the hardware implementation for eddsa verfication. Otherwise, the generic -// version is used -func verify(publicKey PublicKey, message, sig []byte) bool { - if cpu.S390X.HasEDDSA { - if l := len(publicKey); l != PublicKeySize { - panic("ed25519: bad public key length: " + strconv.Itoa(l)) - } - - if len(sig) != SignatureSize || sig[63]&224 != 0 { - return false - } - - return kdsaVerify(message, sig, publicKey) - } - return verifyGeneric(publicKey, message, sig) -} diff --git a/src/crypto/ed25519/ed25519_s390x.s b/src/crypto/ed25519/ed25519_s390x.s deleted file mode 100644 index 1c77b51a780..00000000000 --- a/src/crypto/ed25519/ed25519_s390x.s +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright 2020 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "textflag.h" - -// func kdsaSign(message, signature, privateKey []byte) bool -TEXT ·kdsaSign(SB), $4096-73 - // The kdsa instruction takes function code, - // buffer's location, message's location and message len - // as parameters. Out of those, the function code and buffer's location - // should be placed in R0 and R1 respectively. The message's location - // and message length should be placed in an even-odd register pair. (e.g: R2 and R3) - - // The content of parameter block(buffer) looks like the following: - // Signature R, Signature S and Private Key all take 32 bytes. - // In the signing case, the signatures(R and S) will be generated by - // the signing instruction and get placed in the locations shown in the parameter block. - // 0 +---------------+ - // | Signature(R) | - // 32 +---------------+ - // | Signature(S) | - // 64 +---------------+ - // | Private Key | - // 96 +---------------+ - // | Reserved | - // 112+---------------+ - // | | - // | ... | - // | | - // 4088 +---------------+ - - // The following code section setups the buffer from stack: - // Get the address of the buffer stack variable. - MOVD $buffer-4096(SP), R1 - - // Zero the buffer. - MOVD R1, R2 - MOVD $(4096/256), R0 // number of 256 byte chunks to clear - -clear: - XC $256, (R2), (R2) - MOVD $256(R2), R2 - BRCTG R0, clear - - MOVD $40, R0 // EDDSA-25519 sign has a function code of 40 - LMG message+0(FP), R2, R3 // R2=base R3=len - LMG signature+24(FP), R4, R5 // R4=base R5=len - LMG privateKey+48(FP), R6, R7 // R6=base R7=len - - // Checks the length of signature and private key - CMPBNE R5, $64, panic - CMPBNE R7, $32, panic - - // The instruction uses RFC 8032's private key, which is the first 32 bytes - // of the private key in this package. So we copy that into the buffer. - MVC $32, (R6), 64(R1) - -loop: - WORD $0xB93A0002 // The KDSA instruction - BVS loop // The instruction is exectued by hardware and can be interrupted. This does a retry when that happens. - BNE error - -success: - // The signatures generated are in big-endian form, so we - // need to reverse the bytes of Signature(R) and Signature(S) in the buffers to transform - // them from big-endian to little-endian. - - // Transform Signature(R) from big endian to little endian and copy into the signature - MVCIN $32, 31(R1), (R4) - - // Transform Signature(S) from big endian to little endian and copy into the signature - MVCIN $32, 63(R1), 32(R4) - - MOVB $1, ret+72(FP) - RET - -error: - // return false - MOVB $0, ret+72(FP) - RET - -panic: - UNDEF - -// func kdsaVerify(message, signature, publicKey []byte) bool -TEXT ·kdsaVerify(SB), $4096-73 - // The kdsa instruction takes function code, - // buffer's location, message's location and message len - // as parameters. Out of those, the function code and buffer's location - // should be placed in R0 and R1 respectively. The message's location - // and message length should be placed in an even-odd register pair. (e.g: R2 and R3) - - // The parameter block(buffer) is similar to that of signing, except that - // we use public key for verification, and Signatures(R and S) are provided - // as input parameters to the parameter block. - // 0 +---------------+ - // | Signature(R) | - // 32 +---------------+ - // | Signature(S) | - // 64 +---------------+ - // | Public Key | - // 96 +---------------+ - // | Reserved | - // 112+---------------+ - // | | - // | ... | - // | | - // 4088 +---------------+ - - // The following code section setups the buffer from stack: - // Get the address of the buffer stack variable. - MOVD $buffer-4096(SP), R1 - - // Zero the buffer. - MOVD R1, R2 - MOVD $(4096/256), R0 // number of 256 byte chunks to clear - -clear: - XC $256, (R2), (R2) - MOVD $256(R2), R2 - BRCTG R0, clear - - MOVD $32, R0 // EDDSA-25519 verify has a function code of 32 - LMG message+0(FP), R2, R3 // R2=base R3=len - LMG signature+24(FP), R4, R5 // R4=base R5=len - LMG publicKey+48(FP), R6, R7 // R6=base R7=len - - // Checks the length of public key and signature - CMPBNE R5, $64, panic - CMPBNE R7, $32, panic - -verify: - // The instruction needs Signature(R), Signature(S) and public key - // to be in big-endian form during computation. Therefore, - // we do the transformation (from little endian to big endian) and copy those into the buffer. - - // Transform Signature(R) from little endian to big endian and copy into the buffer - MVCIN $32, 31(R4), (R1) - - // Transform Signature(S) from little endian to big endian and copy into the buffer - MVCIN $32, 63(R4), 32(R1) - - // Transform Public Key from little endian to big endian and copy into the buffer - MVCIN $32, 31(R6), 64(R1) - -verifyLoop: - WORD $0xB93A0002 // KDSA instruction - BVS verifyLoop // Retry upon hardware interrupt - BNE error - -success: - MOVB $1, ret+72(FP) - RET - -error: - MOVB $0, ret+72(FP) - RET - -panic: - UNDEF diff --git a/src/crypto/ed25519/ed25519_test.go b/src/crypto/ed25519/ed25519_test.go index f77d463721c..adb09e409a5 100644 --- a/src/crypto/ed25519/ed25519_test.go +++ b/src/crypto/ed25519/ed25519_test.go @@ -26,14 +26,6 @@ func (zeroReader) Read(buf []byte) (int, error) { return len(buf), nil } -// signGenericWrapper is identical to Sign except that it unconditionally calls signGeneric directly -// rather than going through the sign function that might call assembly code. -func signGenericWrapper(privateKey PrivateKey, msg []byte) []byte { - sig := make([]byte, SignatureSize) - signGeneric(sig, privateKey, msg) - return sig -} - func TestUnmarshalMarshal(t *testing.T) { pub, _, _ := GenerateKey(rand.Reader) @@ -53,33 +45,22 @@ func TestUnmarshalMarshal(t *testing.T) { } func TestSignVerify(t *testing.T) { - t.Run("Generic", func(t *testing.T) { testSignVerify(t, signGenericWrapper, verifyGeneric) }) - t.Run("Native", func(t *testing.T) { testSignVerify(t, Sign, Verify) }) -} - -func testSignVerify(t *testing.T, signImpl func(privateKey PrivateKey, message []byte) []byte, - verifyImpl func(publicKey PublicKey, message, sig []byte) bool) { var zero zeroReader public, private, _ := GenerateKey(zero) message := []byte("test message") - sig := signImpl(private, message) - if !verifyImpl(public, message, sig) { + sig := Sign(private, message) + if !Verify(public, message, sig) { t.Errorf("valid signature rejected") } wrongMessage := []byte("wrong message") - if verifyImpl(public, wrongMessage, sig) { + if Verify(public, wrongMessage, sig) { t.Errorf("signature of different message accepted") } } func TestCryptoSigner(t *testing.T) { - t.Run("Generic", func(t *testing.T) { testCryptoSigner(t, verifyGeneric) }) - t.Run("Native", func(t *testing.T) { testCryptoSigner(t, Verify) }) -} - -func testCryptoSigner(t *testing.T, verifyImpl func(publicKey PublicKey, message, sig []byte) bool) { var zero zeroReader public, private, _ := GenerateKey(zero) @@ -102,7 +83,7 @@ func testCryptoSigner(t *testing.T, verifyImpl func(publicKey PublicKey, message t.Fatalf("error from Sign(): %s", err) } - if !verifyImpl(public, message, signature) { + if !Verify(public, message, signature) { t.Errorf("Verify failed on signature from Sign()") } } @@ -130,12 +111,6 @@ func TestEqual(t *testing.T) { } func TestGolden(t *testing.T) { - t.Run("Generic", func(t *testing.T) { testGolden(t, signGenericWrapper, verifyGeneric) }) - t.Run("Native", func(t *testing.T) { testGolden(t, Sign, Verify) }) -} - -func testGolden(t *testing.T, signImpl func(privateKey PrivateKey, message []byte) []byte, - verifyImpl func(publicKey PublicKey, message, sig []byte) bool) { // sign.input.gz is a selection of test cases from // https://ed25519.cr.yp.to/python/sign.input testDataZ, err := os.Open("testdata/sign.input.gz") @@ -177,12 +152,12 @@ func testGolden(t *testing.T, signImpl func(privateKey PrivateKey, message []byt copy(priv[:], privBytes) copy(priv[32:], pubKey) - sig2 := signImpl(priv[:], msg) + sig2 := Sign(priv[:], msg) if !bytes.Equal(sig, sig2[:]) { t.Errorf("different signature result on line %d: %x vs %x", lineNo, sig, sig2) } - if !verifyImpl(pubKey, msg, sig2) { + if !Verify(pubKey, msg, sig2) { t.Errorf("signature failed to verify on line %d", lineNo) } @@ -206,11 +181,6 @@ func testGolden(t *testing.T, signImpl func(privateKey PrivateKey, message []byt } func TestMalleability(t *testing.T) { - t.Run("Generic", func(t *testing.T) { testMalleability(t, verifyGeneric) }) - t.Run("Native", func(t *testing.T) { testMalleability(t, Verify) }) -} - -func testMalleability(t *testing.T, verifyImpl func(publicKey PublicKey, message, sig []byte) bool) { // https://tools.ietf.org/html/rfc8032#section-5.1.7 adds an additional test // that s be in [0, order). This prevents someone from adding a multiple of // order to s and obtaining a second valid signature for the same message. @@ -229,7 +199,7 @@ func testMalleability(t *testing.T, verifyImpl func(publicKey PublicKey, message 0xb1, 0x08, 0xc3, 0xbd, 0xae, 0x36, 0x9e, 0xf5, 0x49, 0xfa, } - if verifyImpl(publicKey, msg, sig) { + if Verify(publicKey, msg, sig) { t.Fatal("non-canonical signature accepted") } } diff --git a/src/runtime/lockrank_off.go b/src/runtime/lockrank_off.go index 891589c0f27..425ca8dd93f 100644 --- a/src/runtime/lockrank_off.go +++ b/src/runtime/lockrank_off.go @@ -1,3 +1,7 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + // +build !goexperiment.staticlockranking package runtime diff --git a/src/runtime/lockrank_on.go b/src/runtime/lockrank_on.go index cf4151ff462..fbc5ff58b72 100644 --- a/src/runtime/lockrank_on.go +++ b/src/runtime/lockrank_on.go @@ -1,3 +1,7 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + // +build goexperiment.staticlockranking package runtime diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go index 60f7f9ff58e..8b3c62c375e 100644 --- a/src/runtime/mpagealloc.go +++ b/src/runtime/mpagealloc.go @@ -233,16 +233,12 @@ type pageAlloc struct { // The address to start an allocation search with. It must never // point to any memory that is not contained in inUse, i.e. - // inUse.contains(searchAddr) must always be true. + // inUse.contains(searchAddr.addr()) must always be true. The one + // exception to this rule is that it may take on the value of + // maxOffAddr to indicate that the heap is exhausted. // - // When added with arenaBaseOffset, we guarantee that - // all valid heap addresses (when also added with - // arenaBaseOffset) below this value are allocated and - // not worth searching. - // - // Note that adding in arenaBaseOffset transforms addresses - // to a new address space with a linear view of the full address - // space on architectures with segmented address spaces. + // We guarantee that all valid heap addresses below this value + // are allocated and not worth searching. searchAddr offAddr // start and end represent the chunk indices @@ -518,6 +514,30 @@ func (s *pageAlloc) allocRange(base, npages uintptr) uintptr { return uintptr(scav) * pageSize } +// findMappedAddr returns the smallest mapped offAddr that is +// >= addr. That is, if addr refers to mapped memory, then it is +// returned. If addr is higher than any mapped region, then +// it returns maxOffAddr. +// +// s.mheapLock must be held. +func (s *pageAlloc) findMappedAddr(addr offAddr) offAddr { + // If we're not in a test, validate first by checking mheap_.arenas. + // This is a fast path which is only safe to use outside of testing. + ai := arenaIndex(addr.addr()) + if s.test || mheap_.arenas[ai.l1()] == nil || mheap_.arenas[ai.l1()][ai.l2()] == nil { + vAddr, ok := s.inUse.findAddrGreaterEqual(addr.addr()) + if ok { + return offAddr{vAddr} + } else { + // The candidate search address is greater than any + // known address, which means we definitely have no + // free memory left. + return maxOffAddr + } + } + return addr +} + // find searches for the first (address-ordered) contiguous free region of // npages in size and returns a base address for that region. // @@ -526,6 +546,7 @@ func (s *pageAlloc) allocRange(base, npages uintptr) uintptr { // // find also computes and returns a candidate s.searchAddr, which may or // may not prune more of the address space than s.searchAddr already does. +// This candidate is always a valid s.searchAddr. // // find represents the slow path and the full radix tree search. // @@ -695,7 +716,7 @@ nextLevel: // We found a sufficiently large run of free pages straddling // some boundary, so compute the address and return it. addr := levelIndexToOffAddr(l, i).add(uintptr(base) * pageSize).addr() - return addr, firstFree.base + return addr, s.findMappedAddr(firstFree.base) } if l == 0 { // We're at level zero, so that means we've exhausted our search. @@ -741,7 +762,7 @@ nextLevel: // found an even narrower free window. searchAddr := chunkBase(ci) + uintptr(searchIdx)*pageSize foundFree(offAddr{searchAddr}, chunkBase(ci+1)-searchAddr) - return addr, firstFree.base + return addr, s.findMappedAddr(firstFree.base) } // alloc allocates npages worth of memory from the page heap, returning the base diff --git a/src/runtime/mpagealloc_test.go b/src/runtime/mpagealloc_test.go index 89a4a2502ce..65ba71d459c 100644 --- a/src/runtime/mpagealloc_test.go +++ b/src/runtime/mpagealloc_test.go @@ -612,6 +612,63 @@ func TestPageAllocAlloc(t *testing.T) { baseChunkIdx + chunkIdxBigJump: {{0, PallocChunkPages}}, }, } + + // Test to check for issue #40191. Essentially, the candidate searchAddr + // discovered by find may not point to mapped memory, so we need to handle + // that explicitly. + // + // chunkIdxSmallOffset is an offset intended to be used within chunkIdxBigJump. + // It is far enough within chunkIdxBigJump that the summaries at the beginning + // of an address range the size of chunkIdxBigJump will not be mapped in. + const chunkIdxSmallOffset = 0x503 + tests["DiscontiguousBadSearchAddr"] = test{ + before: map[ChunkIdx][]BitRange{ + // The mechanism for the bug involves three chunks, A, B, and C, which are + // far apart in the address space. In particular, B is chunkIdxBigJump + + // chunkIdxSmalloffset chunks away from B, and C is 2*chunkIdxBigJump chunks + // away from A. A has 1 page free, B has several (NOT at the end of B), and + // C is totally free. + // Note that B's free memory must not be at the end of B because the fast + // path in the page allocator will check if the searchAddr even gives us + // enough space to place the allocation in a chunk before accessing the + // summary. + BaseChunkIdx + chunkIdxBigJump*0: {{0, PallocChunkPages - 1}}, + BaseChunkIdx + chunkIdxBigJump*1 + chunkIdxSmallOffset: { + {0, PallocChunkPages - 10}, + {PallocChunkPages - 1, 1}, + }, + BaseChunkIdx + chunkIdxBigJump*2: {}, + }, + scav: map[ChunkIdx][]BitRange{ + BaseChunkIdx + chunkIdxBigJump*0: {}, + BaseChunkIdx + chunkIdxBigJump*1 + chunkIdxSmallOffset: {}, + BaseChunkIdx + chunkIdxBigJump*2: {}, + }, + hits: []hit{ + // We first allocate into A to set the page allocator's searchAddr to the + // end of that chunk. That is the only purpose A serves. + {1, PageBase(BaseChunkIdx, PallocChunkPages-1), 0}, + // Then, we make a big allocation that doesn't fit into B, and so must be + // fulfilled by C. + // + // On the way to fulfilling the allocation into C, we estimate searchAddr + // using the summary structure, but that will give us a searchAddr of + // B's base address minus chunkIdxSmallOffset chunks. These chunks will + // not be mapped. + {100, PageBase(baseChunkIdx+chunkIdxBigJump*2, 0), 0}, + // Now we try to make a smaller allocation that can be fulfilled by B. + // In an older implementation of the page allocator, this will segfault, + // because this last allocation will first try to access the summary + // for B's base address minus chunkIdxSmallOffset chunks in the fast path, + // and this will not be mapped. + {9, PageBase(baseChunkIdx+chunkIdxBigJump*1+chunkIdxSmallOffset, PallocChunkPages-10), 0}, + }, + after: map[ChunkIdx][]BitRange{ + BaseChunkIdx + chunkIdxBigJump*0: {{0, PallocChunkPages}}, + BaseChunkIdx + chunkIdxBigJump*1 + chunkIdxSmallOffset: {{0, PallocChunkPages}}, + BaseChunkIdx + chunkIdxBigJump*2: {{0, 100}}, + }, + } } for name, v := range tests { v := v diff --git a/src/runtime/mranges.go b/src/runtime/mranges.go index e23d0778eb9..2c0eb2c2ddf 100644 --- a/src/runtime/mranges.go +++ b/src/runtime/mranges.go @@ -188,6 +188,25 @@ func (a *addrRanges) findSucc(addr uintptr) int { return len(a.ranges) } +// findAddrGreaterEqual returns the smallest address represented by a +// that is >= addr. Thus, if the address is represented by a, +// then it returns addr. The second return value indicates whether +// such an address exists for addr in a. That is, if addr is larger than +// any address known to a, the second return value will be false. +func (a *addrRanges) findAddrGreaterEqual(addr uintptr) (uintptr, bool) { + i := a.findSucc(addr) + if i == 0 { + return a.ranges[0].base.addr(), true + } + if a.ranges[i-1].contains(addr) { + return addr, true + } + if i < len(a.ranges) { + return a.ranges[i].base.addr(), true + } + return 0, false +} + // contains returns true if a covers the address addr. func (a *addrRanges) contains(addr uintptr) bool { i := a.findSucc(addr) diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 2399f0a1d3d..035822216d2 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -132,7 +132,7 @@ func main() { if GOARCH != "wasm" { // no threads on wasm yet, so no sysmon systemstack(func() { - newm(sysmon, nil) + newm(sysmon, nil, -1) }) } @@ -562,7 +562,7 @@ func schedinit() { stackinit() mallocinit() fastrandinit() // must run before mcommoninit - mcommoninit(_g_.m) + mcommoninit(_g_.m, -1) cpuinit() // must run before alginit alginit() // maps must not be used before this call modulesinit() // provides activeModules @@ -623,7 +623,22 @@ func checkmcount() { } } -func mcommoninit(mp *m) { +// mReserveID returns the next ID to use for a new m. This new m is immediately +// considered 'running' by checkdead. +// +// sched.lock must be held. +func mReserveID() int64 { + if sched.mnext+1 < sched.mnext { + throw("runtime: thread ID overflow") + } + id := sched.mnext + sched.mnext++ + checkmcount() + return id +} + +// Pre-allocated ID may be passed as 'id', or omitted by passing -1. +func mcommoninit(mp *m, id int64) { _g_ := getg() // g0 stack won't make sense for user (and is not necessary unwindable). @@ -632,12 +647,12 @@ func mcommoninit(mp *m) { } lock(&sched.lock) - if sched.mnext+1 < sched.mnext { - throw("runtime: thread ID overflow") + + if id >= 0 { + mp.id = id + } else { + mp.id = mReserveID() } - mp.id = sched.mnext - sched.mnext++ - checkmcount() mp.fastrand[0] = uint32(int64Hash(uint64(mp.id), fastrandseed)) mp.fastrand[1] = uint32(int64Hash(uint64(cputicks()), ^fastrandseed)) @@ -1068,7 +1083,7 @@ func startTheWorldWithSema(emitTraceEvent bool) int64 { notewakeup(&mp.park) } else { // Start M to run P. Do not start another M below. - newm(nil, p) + newm(nil, p, -1) } } @@ -1413,12 +1428,13 @@ type cgothreadstart struct { // Allocate a new m unassociated with any thread. // Can use p for allocation context if needed. // fn is recorded as the new m's m.mstartfn. +// id is optional pre-allocated m ID. Omit by passing -1. // // This function is allowed to have write barriers even if the caller // isn't because it borrows _p_. // //go:yeswritebarrierrec -func allocm(_p_ *p, fn func()) *m { +func allocm(_p_ *p, fn func(), id int64) *m { _g_ := getg() acquirem() // disable GC because it can be called from sysmon if _g_.m.p == 0 { @@ -1447,7 +1463,7 @@ func allocm(_p_ *p, fn func()) *m { mp := new(m) mp.mstartfn = fn - mcommoninit(mp) + mcommoninit(mp, id) // In case of cgo or Solaris or illumos or Darwin, pthread_create will make us a stack. // Windows and Plan 9 will layout sched stack on OS stack. @@ -1586,7 +1602,7 @@ func oneNewExtraM() { // The sched.pc will never be returned to, but setting it to // goexit makes clear to the traceback routines where // the goroutine stack ends. - mp := allocm(nil, nil) + mp := allocm(nil, nil, -1) gp := malg(4096) gp.sched.pc = funcPC(goexit) + sys.PCQuantum gp.sched.sp = gp.stack.hi @@ -1757,9 +1773,11 @@ var newmHandoff struct { // Create a new m. It will start off with a call to fn, or else the scheduler. // fn needs to be static and not a heap allocated closure. // May run with m.p==nil, so write barriers are not allowed. +// +// id is optional pre-allocated m ID. Omit by passing -1. //go:nowritebarrierrec -func newm(fn func(), _p_ *p) { - mp := allocm(_p_, fn) +func newm(fn func(), _p_ *p, id int64) { + mp := allocm(_p_, fn, id) mp.nextp.set(_p_) mp.sigmask = initSigmask if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" { @@ -1828,7 +1846,7 @@ func startTemplateThread() { releasem(mp) return } - newm(templateThread, nil) + newm(templateThread, nil, -1) releasem(mp) } @@ -1923,16 +1941,31 @@ func startm(_p_ *p, spinning bool) { } } mp := mget() - unlock(&sched.lock) if mp == nil { + // No M is available, we must drop sched.lock and call newm. + // However, we already own a P to assign to the M. + // + // Once sched.lock is released, another G (e.g., in a syscall), + // could find no idle P while checkdead finds a runnable G but + // no running M's because this new M hasn't started yet, thus + // throwing in an apparent deadlock. + // + // Avoid this situation by pre-allocating the ID for the new M, + // thus marking it as 'running' before we drop sched.lock. This + // new M will eventually run the scheduler to execute any + // queued G's. + id := mReserveID() + unlock(&sched.lock) + var fn func() if spinning { // The caller incremented nmspinning, so set m.spinning in the new M. fn = mspinning } - newm(fn, _p_) + newm(fn, _p_, id) return } + unlock(&sched.lock) if mp.spinning { throw("startm: m is spinning") } @@ -5192,7 +5225,9 @@ func runqputbatch(pp *p, q *gQueue, qsize int) { atomic.StoreRel(&pp.runqtail, t) if !q.empty() { + lock(&sched.lock) globrunqputbatch(q, int32(qsize)) + unlock(&sched.lock) } } diff --git a/src/sync/runtime2.go b/src/sync/runtime2.go index 931edad9f1c..f10c4e8e0ef 100644 --- a/src/sync/runtime2.go +++ b/src/sync/runtime2.go @@ -1,3 +1,7 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + // +build !goexperiment.staticlockranking package sync diff --git a/src/sync/runtime2_lockrank.go b/src/sync/runtime2_lockrank.go index 5a68e901fa8..aaa1c276261 100644 --- a/src/sync/runtime2_lockrank.go +++ b/src/sync/runtime2_lockrank.go @@ -1,3 +1,7 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + // +build goexperiment.staticlockranking package sync diff --git a/src/syscall/exec_linux.go b/src/syscall/exec_linux.go index 23d7343d3ad..b7351cda823 100644 --- a/src/syscall/exec_linux.go +++ b/src/syscall/exec_linux.go @@ -465,7 +465,7 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att } _, _, err1 = RawSyscall(SYS_DUP3, uintptr(fd[i]), uintptr(nextfd), O_CLOEXEC) if _SYS_dup != SYS_DUP3 && err1 == ENOSYS { - _, _, err1 = RawSyscall(_SYS_dup, uintptr(pipe), uintptr(nextfd), 0) + _, _, err1 = RawSyscall(_SYS_dup, uintptr(fd[i]), uintptr(nextfd), 0) if err1 != 0 { goto childerror } diff --git a/src/testing/testing.go b/src/testing/testing.go index 85da6bb02a1..061142b9abd 100644 --- a/src/testing/testing.go +++ b/src/testing/testing.go @@ -3,7 +3,7 @@ // license that can be found in the LICENSE file. // Package testing provides support for automated testing of Go packages. -// It is intended to be used in concert with the ``go test'' command, which automates +// It is intended to be used in concert with the "go test" command, which automates // execution of any function of the form // func TestXxx(*testing.T) // where Xxx does not start with a lowercase letter. The function name @@ -14,8 +14,8 @@ // To write a new test suite, create a file whose name ends _test.go that // contains the TestXxx functions as described here. Put the file in the same // package as the one being tested. The file will be excluded from regular -// package builds but will be included when the ``go test'' command is run. -// For more detail, run ``go help test'' and ``go help testflag''. +// package builds but will be included when the "go test" command is run. +// For more detail, run "go help test" and "go help testflag". // // A simple test function looks like this: // diff --git a/test/codegen/memops.go b/test/codegen/memops.go index cd35910c128..a2342831460 100644 --- a/test/codegen/memops.go +++ b/test/codegen/memops.go @@ -354,3 +354,26 @@ func idxCompare(i int) int { } return 1 } + +func idxFloatOps(a []float64, b []float32, i int) (float64, float32) { + c := float64(7) + // amd64: `ADDSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` + c += a[i+1] + // amd64: `SUBSD\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` + c -= a[i+2] + // amd64: `MULSD\t24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` + c *= a[i+3] + // amd64: `DIVSD\t32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` + c /= a[i+4] + + d := float32(8) + // amd64: `ADDSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` + d += b[i+1] + // amd64: `SUBSS\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` + d -= b[i+2] + // amd64: `MULSS\t12\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` + d *= b[i+3] + // amd64: `DIVSS\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` + d /= b[i+4] + return c, d +} diff --git a/test/fixedbugs/issue40367.go b/test/fixedbugs/issue40367.go new file mode 100644 index 00000000000..0dc5ad71206 --- /dev/null +++ b/test/fixedbugs/issue40367.go @@ -0,0 +1,41 @@ +// run + +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +func case1() { + rates := []int32{1,2,3,4,5,6} + var sink [6]int + j := len(sink) + for star, _ := range rates { + if star+1 < 1 { + panic("") + } + j-- + sink[j] = j + } +} + +func case2() { + i := 0 + var sink [3]int + j := len(sink) +top: + j-- + sink[j] = j + if i < 2 { + i++ + if i < 1 { + return + } + goto top + } +} + +func main() { + case1() + case2() +} \ No newline at end of file diff --git a/test/prove.go b/test/prove.go index d37021d2830..3c19c513b65 100644 --- a/test/prove.go +++ b/test/prove.go @@ -670,7 +670,8 @@ func oforuntil(b []int) { i := 0 if len(b) > i { top: - println(b[i]) // ERROR "Induction variable: limits \[0,\?\), increment 1$" "Proved IsInBounds$" + // TODO: remove the todo of next line once we complete the following optimization of CL 244579 + // println(b[i]) // todo: ERROR "Induction variable: limits \[0,\?\), increment 1$" "Proved IsInBounds$" i++ if i < len(b) { goto top @@ -720,7 +721,8 @@ func range1(b []int) { // range2 elements are larger, so they use the general form of a range loop. func range2(b [][32]int) { for i, v := range b { - b[i][0] = v[0] + 1 // ERROR "Induction variable: limits \[0,\?\), increment 1$" "Proved IsInBounds$" + // TODO: remove the todo of next line once we complete the following optimization of CL 244579 + b[i][0] = v[0] + 1 // todo: ERROR "Induction variable: limits \[0,\?\), increment 1$" "Proved IsInBounds$" if i < len(b) { // ERROR "Proved Less64$" println("x") }