runtime: introduce a mallocgc fast path

If debug.malloc or gcBlackenEnabled are true or there is a secret, fall
back to a partially-specialized malloc function so we can shorten the
fast path function.

Some of the consts have been turned into vars but I've verified that the
compiler is still able to treat them as constants.

goos: linux
goarch: amd64
pkg: runtime
cpu: Intel(R) Xeon(R) Platinum 8481C CPU @ 2.70GHz
                                               │ 1_mqzmwsxo_11cd032288cb_runtime__mkmalloc__allow_for_folding_const_bool_exprs.txt │ 2_kpsrxxml_25246d9fd438_runtime__introduce_a_mallocgc_fast_path.txt │
                                               │                                      sec/op                                       │                   sec/op                     vs base                │
Mallocgc/scan=noscan/size=1/kind=new-88                                                                                5.935n ± 0%                                   4.848n ± 0%  -18.32% (p=0.000 n=10)
Mallocgc/scan=noscan/size=1/kind=mallocgc-88                                                                           7.610n ± 0%                                   7.791n ± 0%   +2.39% (p=0.000 n=10)
Mallocgc/scan=noscan/size=2/kind=new-88                                                                                5.979n ± 0%                                   5.303n ± 0%  -11.31% (p=0.000 n=10)
Mallocgc/scan=noscan/size=2/kind=mallocgc-88                                                                           7.373n ± 0%                                   7.004n ± 0%   -5.00% (p=0.000 n=10)
Mallocgc/scan=noscan/size=3/kind=new-88                                                                                7.418n ± 0%                                   6.432n ± 0%  -13.29% (p=0.000 n=10)
Mallocgc/scan=noscan/size=3/kind=mallocgc-88                                                                           8.513n ± 0%                                   8.188n ± 0%   -3.83% (p=0.000 n=10)
Mallocgc/scan=noscan/size=4/kind=new-88                                                                                7.918n ± 2%                                   7.162n ± 0%   -9.54% (p=0.000 n=10)
Mallocgc/scan=noscan/size=4/kind=mallocgc-88                                                                           8.907n ± 0%                                   9.048n ± 1%   +1.59% (p=0.000 n=10)
Mallocgc/scan=noscan/size=5/kind=new-88                                                                                8.828n ± 0%                                   8.335n ± 1%   -5.58% (p=0.000 n=10)
Mallocgc/scan=noscan/size=5/kind=mallocgc-88                                                                           9.564n ± 0%                                   9.944n ± 0%   +3.97% (p=0.000 n=10)
Mallocgc/scan=noscan/size=6/kind=new-88                                                                               10.340n ± 0%                                   9.640n ± 0%   -6.76% (p=0.000 n=10)
Mallocgc/scan=noscan/size=6/kind=mallocgc-88                                                                           11.31n ± 0%                                   11.76n ± 0%   +3.93% (p=0.000 n=10)
Mallocgc/scan=noscan/size=7/kind=new-88                                                                               10.805n ± 0%                                   9.853n ± 0%   -8.82% (p=0.000 n=10)
Mallocgc/scan=noscan/size=7/kind=mallocgc-88                                                                           11.39n ± 1%                                   11.94n ± 0%   +4.78% (p=0.000 n=10)
Mallocgc/scan=noscan/size=8/kind=new-88                                                                                9.976n ± 0%                                   9.204n ± 0%   -7.74% (p=0.000 n=10)
Mallocgc/scan=noscan/size=8/kind=mallocgc-88                                                                           10.97n ± 1%                                   11.24n ± 0%   +2.41% (p=0.000 n=10)
Mallocgc/scan=noscan/size=9/kind=new-88                                                                                15.23n ± 0%                                   14.74n ± 1%   -3.18% (p=0.000 n=10)
Mallocgc/scan=noscan/size=9/kind=mallocgc-88                                                                           16.36n ± 1%                                   17.14n ± 0%   +4.80% (p=0.000 n=10)
Mallocgc/scan=noscan/size=10/kind=new-88                                                                               14.99n ± 1%                                   15.23n ± 0%   +1.57% (p=0.000 n=10)
Mallocgc/scan=noscan/size=10/kind=mallocgc-88                                                                          16.20n ± 0%                                   17.65n ± 2%   +8.92% (p=0.000 n=10)
Mallocgc/scan=noscan/size=11/kind=new-88                                                                               15.19n ± 0%                                   14.72n ± 0%   -3.13% (p=0.000 n=10)
Mallocgc/scan=noscan/size=11/kind=mallocgc-88                                                                          16.39n ± 0%                                   17.13n ± 0%   +4.51% (p=0.000 n=10)
Mallocgc/scan=noscan/size=12/kind=new-88                                                                               15.08n ± 1%                                   14.59n ± 0%   -3.25% (p=0.000 n=10)
Mallocgc/scan=noscan/size=12/kind=mallocgc-88                                                                          16.61n ± 0%                                   18.32n ± 0%  +10.23% (p=0.000 n=10)
Mallocgc/scan=noscan/size=13/kind=new-88                                                                               16.61n ± 0%                                   14.68n ± 1%  -11.62% (p=0.000 n=10)
Mallocgc/scan=noscan/size=13/kind=mallocgc-88                                                                          16.46n ± 0%                                   17.20n ± 1%   +4.43% (p=0.000 n=10)
Mallocgc/scan=noscan/size=14/kind=new-88                                                                               16.14n ± 0%                                   14.42n ± 0%  -10.68% (p=0.000 n=10)
Mallocgc/scan=noscan/size=14/kind=mallocgc-88                                                                          16.78n ± 1%                                   17.27n ± 1%   +2.92% (p=0.000 n=10)
Mallocgc/scan=noscan/size=15/kind=new-88                                                                               15.76n ± 4%                                   14.96n ± 1%   -5.11% (p=0.000 n=10)
Mallocgc/scan=noscan/size=15/kind=mallocgc-88                                                                          17.02n ± 1%                                   17.11n ± 0%        ~ (p=0.254 n=10)
Mallocgc/scan=noscan/size=16/kind=new-88                                                                               14.37n ± 0%                                   13.61n ± 0%   -5.29% (p=0.000 n=10)
Mallocgc/scan=noscan/size=16/kind=mallocgc-88                                                                          15.54n ± 0%                                   15.01n ± 0%   -3.44% (p=0.000 n=10)
Mallocgc/scan=noscan/size=24/kind=new-88                                                                               17.32n ± 0%                                   16.12n ± 1%   -6.93% (p=0.000 n=10)
Mallocgc/scan=noscan/size=24/kind=mallocgc-88                                                                          18.73n ± 1%                                   17.78n ± 1%   -5.05% (p=0.000 n=10)
Mallocgc/scan=noscan/size=32/kind=new-88                                                                               19.31n ± 1%                                   18.84n ± 1%   -2.49% (p=0.000 n=10)
Mallocgc/scan=noscan/size=32/kind=mallocgc-88                                                                          20.66n ± 1%                                   20.14n ± 1%   -2.54% (p=0.000 n=10)
Mallocgc/scan=noscan/size=48/kind=new-88                                                                               25.30n ± 1%                                   24.33n ± 1%   -3.85% (p=0.000 n=10)
Mallocgc/scan=noscan/size=48/kind=mallocgc-88                                                                          26.33n ± 1%                                   25.46n ± 1%   -3.30% (p=0.000 n=10)
Mallocgc/scan=noscan/size=64/kind=new-88                                                                               30.68n ± 1%                                   29.88n ± 1%   -2.64% (p=0.000 n=10)
Mallocgc/scan=noscan/size=64/kind=mallocgc-88                                                                          31.61n ± 2%                                   30.87n ± 1%   -2.34% (p=0.000 n=10)
Mallocgc/scan=noscan/size=80/kind=new-88                                                                               36.79n ± 1%                                   36.39n ± 2%   -1.10% (p=0.000 n=10)
Mallocgc/scan=noscan/size=80/kind=mallocgc-88                                                                          37.58n ± 1%                                   37.19n ± 1%   -1.01% (p=0.001 n=10)
Mallocgc/scan=noscan/size=96/kind=new-88                                                                               43.05n ± 1%                                   42.66n ± 2%   -0.93% (p=0.012 n=10)
Mallocgc/scan=noscan/size=96/kind=mallocgc-88                                                                          43.95n ± 1%                                   42.88n ± 1%   -2.43% (p=0.000 n=10)
Mallocgc/scan=noscan/size=112/kind=new-88                                                                              49.27n ± 1%                                   48.20n ± 1%   -2.18% (p=0.000 n=10)
Mallocgc/scan=noscan/size=112/kind=mallocgc-88                                                                         49.60n ± 1%                                   48.98n ± 1%   -1.24% (p=0.001 n=10)
Mallocgc/scan=noscan/size=128/kind=new-88                                                                              55.36n ± 1%                                   54.47n ± 1%   -1.61% (p=0.002 n=10)
Mallocgc/scan=noscan/size=128/kind=mallocgc-88                                                                         55.86n ± 1%                                   54.61n ± 1%   -2.24% (p=0.001 n=10)
Mallocgc/scan=noscan/size=144/kind=new-88                                                                              64.25n ± 2%                                   63.83n ± 1%   -0.65% (p=0.035 n=10)
Mallocgc/scan=noscan/size=144/kind=mallocgc-88                                                                         64.56n ± 1%                                   63.58n ± 1%   -1.53% (p=0.009 n=10)
Mallocgc/scan=noscan/size=160/kind=new-88                                                                              71.33n ± 2%                                   70.41n ± 1%   -1.30% (p=0.005 n=10)
Mallocgc/scan=noscan/size=160/kind=mallocgc-88                                                                         70.84n ± 1%                                   69.48n ± 2%   -1.92% (p=0.000 n=10)
Mallocgc/scan=noscan/size=176/kind=new-88                                                                              78.52n ± 1%                                   76.81n ± 1%   -2.18% (p=0.000 n=10)
Mallocgc/scan=noscan/size=176/kind=mallocgc-88                                                                         78.21n ± 1%                                   76.46n ± 1%   -2.24% (p=0.001 n=10)
Mallocgc/scan=noscan/size=192/kind=new-88                                                                              83.06n ± 1%                                   82.32n ± 1%   -0.89% (p=0.003 n=10)
Mallocgc/scan=noscan/size=192/kind=mallocgc-88                                                                         82.46n ± 1%                                   80.95n ± 1%   -1.83% (p=0.001 n=10)
Mallocgc/scan=noscan/size=208/kind=new-88                                                                              91.20n ± 1%                                   89.90n ± 2%   -1.43% (p=0.000 n=10)
Mallocgc/scan=noscan/size=208/kind=mallocgc-88                                                                         90.59n ± 1%                                   88.98n ± 1%   -1.79% (p=0.000 n=10)
Mallocgc/scan=noscan/size=224/kind=new-88                                                                              95.21n ± 1%                                   93.02n ± 1%   -2.31% (p=0.000 n=10)
Mallocgc/scan=noscan/size=224/kind=mallocgc-88                                                                         95.38n ± 1%                                   92.83n ± 1%   -2.67% (p=0.000 n=10)
Mallocgc/scan=noscan/size=240/kind=new-88                                                                              103.6n ± 1%                                   101.8n ± 1%   -1.74% (p=0.000 n=10)
Mallocgc/scan=noscan/size=240/kind=mallocgc-88                                                                         103.8n ± 1%                                   100.7n ± 1%   -2.99% (p=0.000 n=10)
Mallocgc/scan=noscan/size=256/kind=new-88                                                                              110.5n ± 1%                                   107.4n ± 1%   -2.76% (p=0.000 n=10)
Mallocgc/scan=noscan/size=256/kind=mallocgc-88                                                                         111.6n ± 1%                                   107.7n ± 1%   -3.54% (p=0.000 n=10)
Mallocgc/scan=noscan/size=288/kind=new-88                                                                              121.0n ± 1%                                   118.5n ± 1%   -2.07% (p=0.000 n=10)
Mallocgc/scan=noscan/size=288/kind=mallocgc-88                                                                         121.1n ± 1%                                   117.7n ± 1%   -2.89% (p=0.000 n=10)
Mallocgc/scan=noscan/size=320/kind=new-88                                                                              135.0n ± 1%                                   131.1n ± 1%   -2.89% (p=0.000 n=10)
Mallocgc/scan=noscan/size=320/kind=mallocgc-88                                                                         133.8n ± 1%                                   130.7n ± 1%   -2.39% (p=0.000 n=10)
Mallocgc/scan=noscan/size=352/kind=new-88                                                                              153.2n ± 1%                                   148.8n ± 1%   -2.90% (p=0.000 n=10)
Mallocgc/scan=noscan/size=352/kind=mallocgc-88                                                                         152.0n ± 1%                                   148.0n ± 1%   -2.66% (p=0.000 n=10)
Mallocgc/scan=noscan/size=384/kind=new-88                                                                              158.3n ± 0%                                   154.4n ± 1%   -2.43% (p=0.000 n=10)
Mallocgc/scan=noscan/size=384/kind=mallocgc-88                                                                         158.3n ± 1%                                   153.3n ± 1%   -3.16% (p=0.000 n=10)
Mallocgc/scan=noscan/size=416/kind=new-88                                                                              174.2n ± 1%                                   170.0n ± 2%   -2.38% (p=0.000 n=10)
Mallocgc/scan=noscan/size=416/kind=mallocgc-88                                                                         174.5n ± 1%                                   169.6n ± 1%   -2.81% (p=0.000 n=10)
Mallocgc/scan=noscan/size=448/kind=new-88                                                                              182.3n ± 1%                                   177.2n ± 1%   -2.85% (p=0.000 n=10)
Mallocgc/scan=noscan/size=448/kind=mallocgc-88                                                                         181.8n ± 1%                                   177.7n ± 1%   -2.26% (p=0.000 n=10)
Mallocgc/scan=noscan/size=480/kind=new-88                                                                              205.0n ± 1%                                   198.6n ± 1%   -3.12% (p=0.000 n=10)
Mallocgc/scan=noscan/size=480/kind=mallocgc-88                                                                         205.2n ± 2%                                   200.2n ± 1%   -2.44% (p=0.000 n=10)
Mallocgc/scan=noscan/size=512/kind=new-88                                                                              217.3n ± 1%                                   211.7n ± 1%   -2.58% (p=0.000 n=10)
Mallocgc/scan=noscan/size=512/kind=mallocgc-88                                                                         217.8n ± 1%                                   214.9n ± 2%   -1.33% (p=0.007 n=10)
Mallocgc/scan=scan/size=8/kind=new-88                                                                                  11.63n ± 1%                                   11.21n ± 0%   -3.61% (p=0.000 n=10)
Mallocgc/scan=scan/size=8/kind=mallocgc-88                                                                             13.31n ± 0%                                   12.56n ± 0%   -5.63% (p=0.000 n=10)
Mallocgc/scan=scan/size=16/kind=new-88                                                                                 15.58n ± 0%                                   15.02n ± 3%   -3.63% (p=0.001 n=10)
Mallocgc/scan=scan/size=16/kind=mallocgc-88                                                                            16.95n ± 0%                                   16.20n ± 0%   -4.43% (p=0.000 n=10)
Mallocgc/scan=scan/size=24/kind=new-88                                                                                 19.78n ± 0%                                   18.33n ± 1%   -7.31% (p=0.000 n=10)
Mallocgc/scan=scan/size=24/kind=mallocgc-88                                                                            21.03n ± 0%                                   19.36n ± 0%   -7.94% (p=0.000 n=10)
Mallocgc/scan=scan/size=32/kind=new-88                                                                                 21.33n ± 1%                                   20.46n ± 0%   -4.08% (p=0.000 n=10)
Mallocgc/scan=scan/size=32/kind=mallocgc-88                                                                            22.54n ± 1%                                   21.56n ± 1%   -4.37% (p=0.000 n=10)
Mallocgc/scan=scan/size=48/kind=new-88                                                                                 28.44n ± 1%                                   26.74n ± 0%   -5.96% (p=0.000 n=10)
Mallocgc/scan=scan/size=48/kind=mallocgc-88                                                                            29.66n ± 1%                                   27.89n ± 1%   -5.95% (p=0.000 n=10)
Mallocgc/scan=scan/size=64/kind=new-88                                                                                 32.30n ± 1%                                   31.44n ± 1%   -2.69% (p=0.000 n=10)
Mallocgc/scan=scan/size=64/kind=mallocgc-88                                                                            33.40n ± 1%                                   32.15n ± 1%   -3.73% (p=0.000 n=10)
Mallocgc/scan=scan/size=80/kind=new-88                                                                                 39.29n ± 1%                                   38.26n ± 1%   -2.60% (p=0.000 n=10)
Mallocgc/scan=scan/size=80/kind=mallocgc-88                                                                            40.63n ± 1%                                   39.55n ± 1%   -2.65% (p=0.000 n=10)
Mallocgc/scan=scan/size=96/kind=new-88                                                                                 46.17n ± 1%                                   44.06n ± 1%   -4.58% (p=0.000 n=10)
Mallocgc/scan=scan/size=96/kind=mallocgc-88                                                                            46.75n ± 1%                                   45.33n ± 1%   -3.05% (p=0.000 n=10)
Mallocgc/scan=scan/size=112/kind=new-88                                                                                51.73n ± 1%                                   50.41n ± 1%   -2.54% (p=0.000 n=10)
Mallocgc/scan=scan/size=112/kind=mallocgc-88                                                                           52.17n ± 1%                                   50.71n ± 1%   -2.79% (p=0.000 n=10)
Mallocgc/scan=scan/size=128/kind=new-88                                                                                57.70n ± 1%                                   55.37n ± 1%   -4.03% (p=0.000 n=10)
Mallocgc/scan=scan/size=128/kind=mallocgc-88                                                                           57.74n ± 1%                                   56.22n ± 1%   -2.63% (p=0.000 n=10)
Mallocgc/scan=scan/size=144/kind=new-88                                                                                68.42n ± 1%                                   66.92n ± 1%   -2.19% (p=0.000 n=10)
Mallocgc/scan=scan/size=144/kind=mallocgc-88                                                                           67.12n ± 1%                                   66.14n ± 2%   -1.45% (p=0.000 n=10)
Mallocgc/scan=scan/size=160/kind=new-88                                                                                75.02n ± 2%                                   73.44n ± 1%   -2.11% (p=0.000 n=10)
Mallocgc/scan=scan/size=160/kind=mallocgc-88                                                                           74.08n ± 1%                                   72.42n ± 1%   -2.23% (p=0.000 n=10)
Mallocgc/scan=scan/size=176/kind=new-88                                                                                79.69n ± 1%                                   77.68n ± 2%   -2.52% (p=0.000 n=10)
Mallocgc/scan=scan/size=176/kind=mallocgc-88                                                                           79.82n ± 1%                                   78.87n ± 2%        ~ (p=0.089 n=10)
Mallocgc/scan=scan/size=192/kind=new-88                                                                                86.32n ± 1%                                   84.97n ± 2%   -1.56% (p=0.004 n=10)
Mallocgc/scan=scan/size=192/kind=mallocgc-88                                                                           85.43n ± 1%                                   84.10n ± 1%   -1.55% (p=0.000 n=10)
Mallocgc/scan=scan/size=208/kind=new-88                                                                                92.27n ± 1%                                   90.84n ± 1%   -1.55% (p=0.001 n=10)
Mallocgc/scan=scan/size=208/kind=mallocgc-88                                                                           92.55n ± 1%                                   90.42n ± 1%   -2.30% (p=0.000 n=10)
Mallocgc/scan=scan/size=224/kind=new-88                                                                               100.90n ± 1%                                   97.52n ± 1%   -3.35% (p=0.000 n=10)
Mallocgc/scan=scan/size=224/kind=mallocgc-88                                                                           99.86n ± 2%                                   96.97n ± 1%   -2.90% (p=0.000 n=10)
Mallocgc/scan=scan/size=240/kind=new-88                                                                                105.8n ± 1%                                   101.7n ± 1%   -3.78% (p=0.000 n=10)
Mallocgc/scan=scan/size=240/kind=mallocgc-88                                                                           105.5n ± 1%                                   102.1n ± 2%   -3.18% (p=0.000 n=10)
Mallocgc/scan=scan/size=256/kind=new-88                                                                                112.3n ± 1%                                   107.4n ± 1%   -4.32% (p=0.000 n=10)
Mallocgc/scan=scan/size=256/kind=mallocgc-88                                                                           111.9n ± 1%                                   107.8n ± 1%   -3.66% (p=0.000 n=10)
Mallocgc/scan=scan/size=288/kind=new-88                                                                                127.1n ± 1%                                   123.4n ± 0%   -2.95% (p=0.000 n=10)
Mallocgc/scan=scan/size=288/kind=mallocgc-88                                                                           127.8n ± 2%                                   123.8n ± 1%   -3.17% (p=0.000 n=10)
Mallocgc/scan=scan/size=320/kind=new-88                                                                                142.0n ± 1%                                   138.5n ± 2%   -2.50% (p=0.000 n=10)
Mallocgc/scan=scan/size=320/kind=mallocgc-88                                                                           141.8n ± 1%                                   137.8n ± 1%   -2.82% (p=0.000 n=10)
Mallocgc/scan=scan/size=352/kind=new-88                                                                                153.5n ± 1%                                   149.8n ± 0%   -2.44% (p=0.000 n=10)
Mallocgc/scan=scan/size=352/kind=mallocgc-88                                                                           153.2n ± 1%                                   149.5n ± 1%   -2.38% (p=0.000 n=10)
Mallocgc/scan=scan/size=384/kind=new-88                                                                                168.6n ± 1%                                   164.4n ± 1%   -2.46% (p=0.000 n=10)
Mallocgc/scan=scan/size=384/kind=mallocgc-88                                                                           166.6n ± 1%                                   163.5n ± 1%   -1.83% (p=0.000 n=10)
Mallocgc/scan=scan/size=416/kind=new-88                                                                                174.7n ± 1%                                   171.3n ± 2%   -1.92% (p=0.000 n=10)
Mallocgc/scan=scan/size=416/kind=mallocgc-88                                                                           173.6n ± 1%                                   171.1n ± 1%   -1.44% (p=0.001 n=10)
Mallocgc/scan=scan/size=448/kind=new-88                                                                                193.8n ± 1%                                   191.1n ± 1%   -1.39% (p=0.000 n=10)
Mallocgc/scan=scan/size=448/kind=mallocgc-88                                                                           192.0n ± 2%                                   191.2n ± 1%   -0.44% (p=0.039 n=10)
Mallocgc/scan=scan/size=480/kind=new-88                                                                                204.7n ± 1%                                   201.5n ± 1%   -1.51% (p=0.001 n=10)
Mallocgc/scan=scan/size=480/kind=mallocgc-88                                                                           206.3n ± 1%                                   200.9n ± 1%   -2.62% (p=0.000 n=10)
Mallocgc/scan=scan/size=512/kind=new-88                                                                                218.9n ± 1%                                   212.6n ± 1%   -2.90% (p=0.000 n=10)
Mallocgc/scan=scan/size=512/kind=mallocgc-88                                                                           217.8n ± 1%                                   214.5n ± 1%   -1.54% (p=0.000 n=10)
geomean                                                                                                                47.37n                                        46.07n        -2.73%

Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64_c2s16-perf_vs_parent-sizespecializedmalloc,gotip-linux-amd64_c3h88-perf_vs_parent-sizespecializedmalloc,gotip-linux-arm64_c4ah72-perf_vs_parent-sizespecializedmalloc,gotip-linux-arm64_c4as16-perf_vs_parent-sizespecializedmalloc
Change-Id: Ifa7822de630e3692f4753086871186af6a6a6964
Reviewed-on: https://go-review.googlesource.com/c/go/+/776120
Reviewed-by: Michael Matloob <matloob@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
TryBot-Bypass: Michael Matloob <matloob@golang.org>
Commit-Queue: Michael Matloob <matloob@golang.org>
This commit is contained in:
Michael Matloob 2026-04-30 15:56:31 -04:00
parent 6716b79b58
commit 75560e67c9
3 changed files with 1410 additions and 1312 deletions

View file

@ -114,6 +114,8 @@ const (
inlineFunc = replacementKind(iota)
subBasicLit
foldCondition
subIdent
deleteConst
)
// op is a single inlining operation for the inliner. Any calls to the function
@ -172,10 +174,12 @@ func specializedMallocConfig(classes []class, sizeToSizeClass []uint8) generator
{inlineFunc, "heapSetTypeNoHeaderStub", "heapSetTypeNoHeaderStub"},
{inlineFunc, "nextFreeFastStub", "nextFreeFastStub"},
{inlineFunc, "writeHeapBitsSmallStub", "writeHeapBitsSmallStub"},
{foldCondition, "isSlowPath_", str(false)},
{subBasicLit, "elemsize_", str(elemsize)},
{subBasicLit, "sizeclass_", str(sc)},
{subBasicLit, "noscanint_", str(noscan)},
{foldCondition, "isTiny_", str(false)},
{subIdent, "mallocgcSlowPathStub", "mallocgcSmallScanSlowPath"},
},
})
}
@ -198,7 +202,7 @@ func specializedMallocConfig(classes []class, sizeToSizeClass []uint8) generator
{inlineFunc, "nextFreeFastTiny", "nextFreeFastTiny"},
{inlineFunc, "postMallocgc", "postMallocgc"},
{inlineFunc, "nextFreeFastStub", "nextFreeFastStub"},
{inlineFunc, "deductAssistCredit", "deductAssistCredit"},
{foldCondition, "isSlowPath_", str(false)},
{subBasicLit, "elemsize_", str(elemsize)},
{subBasicLit, "sizeclass_", str(tinySizeClass)},
{subBasicLit, "noscanint_", str(noscan)},
@ -219,15 +223,72 @@ func specializedMallocConfig(classes []class, sizeToSizeClass []uint8) generator
{inlineFunc, "postMallocgc", "postMallocgc"},
{foldCondition, "isNoScan_", str(true)},
{inlineFunc, "nextFreeFastStub", "nextFreeFastStub"},
{foldCondition, "isSlowPath_", str(false)},
{subBasicLit, "elemsize_", str(elemsize)},
{subBasicLit, "sizeclass_", str(sc)},
{subBasicLit, "noscanint_", str(noscan)},
{foldCondition, "isTiny_", str(false)},
{subIdent, "mallocgcSlowPathStub", "mallocgcSmallNoScanSlowPath"},
},
})
}
}
// Non-size-specialized fallbacks in case we can't do the fast path.
config.specs = append(config.specs, spec{
templateFunc: "mallocStub",
name: "mallocgcTinySlowPath",
ops: []op{
{inlineFunc, "inlinedMalloc", "tinyStub"},
{inlineFunc, "postMallocgc", "postMallocgc"},
{inlineFunc, "nextFreeFastTiny", "nextFreeFastTiny"},
{inlineFunc, "deductAssistCredit", "deductAssistCredit"},
{foldCondition, "isSlowPath_", str(true)},
{foldCondition, "isTiny_", str(true)},
{subBasicLit, "elemsize_", str(classes[sizeToSizeClass[tinySize]].size)},
},
})
config.specs = append(config.specs, spec{
templateFunc: "mallocgcSlowPathStub",
name: "mallocgcSmallScanSlowPath",
ops: []op{
{inlineFunc, "mallocStub", "mallocStub"},
{inlineFunc, "inlinedMalloc", "smallStub"},
{inlineFunc, "heapSetTypeNoHeaderStub", "heapSetTypeNoHeaderStub"},
{inlineFunc, "writeHeapBitsSmallStub", "writeHeapBitsSmallStub"},
{inlineFunc, "postMallocgc", "postMallocgc"},
{inlineFunc, "nextFreeFastStub", "nextFreeFastStub"},
{inlineFunc, "deductAssistCredit", "deductAssistCredit"},
{foldCondition, "isSlowPath_", str(true)},
{foldCondition, "isTiny_", str(false)},
{foldCondition, "isNoScan_", str(false)},
// Remove constants used by size-specialized variants.
{deleteConst, "elemsize", ""},
{deleteConst, "sizeclass", ""},
{deleteConst, "spc", ""},
},
})
config.specs = append(config.specs, spec{
templateFunc: "mallocgcSlowPathStub",
name: "mallocgcSmallNoScanSlowPath",
ops: []op{
{inlineFunc, "mallocStub", "mallocStub"},
{inlineFunc, "inlinedMalloc", "smallStub"},
{inlineFunc, "postMallocgc", "postMallocgc"},
{inlineFunc, "nextFreeFastStub", "nextFreeFastStub"},
{inlineFunc, "deductAssistCredit", "deductAssistCredit"},
{foldCondition, "isSlowPath_", str(true)},
{foldCondition, "isTiny_", str(false)},
{foldCondition, "isNoScan_", str(true)},
// Remove constants used by size-specialized variants.
{deleteConst, "elemsize", ""},
{deleteConst, "sizeclass", ""},
{deleteConst, "spc", ""},
},
})
return config
}
@ -291,6 +352,10 @@ func inline(config generatorConfig) []byte {
stamped = substituteWithBasicLit(stamped, repl.from, repl.to)
case foldCondition:
stamped = foldIfCondition(stamped, repl.from, repl.to)
case subIdent:
stamped = substituteIdent(stamped, repl.from, repl.to)
case deleteConst:
stamped = deleteConstDecl(stamped, repl.from)
default:
log.Fatalf("unknown op kind %v", repl.kind)
}
@ -306,7 +371,7 @@ func inline(config generatorConfig) []byte {
// substituteWithBasicLit recursively renames identifiers in the provided AST
// according to 'from' and 'to'.
func substituteWithBasicLit(node ast.Node, from, to string) ast.Node {
// The op is a substitution of an identifier with an basic literal.
// The op is a substitution of an identifier with a basic literal.
toExpr, err := parser.ParseExpr(to)
if err != nil {
log.Fatalf("parsing expr %q: %v", to, err)
@ -325,6 +390,16 @@ func substituteWithBasicLit(node ast.Node, from, to string) ast.Node {
}, nil)
}
// substituteIdent replaces the ident named 'from' to 'to'.
func substituteIdent(node ast.Node, from, to string) ast.Node {
return astutil.Apply(node, func(cursor *astutil.Cursor) bool {
if ident, ok := cursor.Node().(*ast.Ident); ok && ident.Name == from {
cursor.Replace(&ast.Ident{Name: to, NamePos: ident.NamePos})
}
return true
}, nil)
}
// foldIfCondition replaces 'from' with 'to', which must be "true" or "false".
// It then applies simplifications to any boolean expressions that have literal
// true or false values, from the bottom up. Any if statements that have a condition
@ -397,6 +472,40 @@ func foldIfCondition(node ast.Node, from, to string) ast.Node {
return astutil.Apply(node, nil, handleIfs)
}
// reports whether this is a non-grouped constant decl named 'name'.
func isNamedConstDecl(node ast.Node, name string) bool {
declStmt, ok := node.(*ast.DeclStmt)
if !ok {
return false
}
genDecl, ok := declStmt.Decl.(*ast.GenDecl)
if !ok || genDecl.Tok != token.CONST {
return false
}
if len(genDecl.Specs) != 1 {
return false
}
vs, ok := genDecl.Specs[0].(*ast.ValueSpec)
if !ok || len(vs.Names) != 1 || len(vs.Values) != 1 {
return false
}
return vs.Names[0].Name == name
}
// deleteConstDecl removes const declarations whose name matches the given name.
// It only applies to declaration statements with a single declaration.
func deleteConstDecl(node ast.Node, name string) ast.Node {
return astutil.Apply(node, func(cursor *astutil.Cursor) bool {
if isNamedConstDecl(cursor.Node(), name) {
cursor.Delete()
}
return true
}, nil)
}
// inlineFunction recursively replaces calls to the function 'from' with the body of the function
// 'toDecl'. All calls to 'from' must either have no return values and appear in standalone expression statements
// or otherwise must appear in assignment statements.

File diff suppressed because it is too large Load diff

View file

@ -39,6 +39,7 @@ const noscanint_ = 0
const isNoScan_ = false
const size_ = 0
const isTiny_ = false
const isSlowPath_ = false
func malloc0(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
if doubleCheckMalloc {
@ -55,11 +56,14 @@ func mallocPanic(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
panic("not defined for sizeclass")
}
func mallocgcSlowPathStub(size uintptr, typ *_type, needzero bool, spc spanClass, elemsize uintptr) unsafe.Pointer {
return mallocStub(size, typ, needzero)
}
// WARNING: mallocStub does not do any work for sanitizers so callers need
// to steer out of this codepath early if sanitizers are enabled.
func mallocStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
if isTiny_ {
if isSlowPath_ && isTiny_ {
// secret code, need to avoid the tiny allocator since it might keep
// co-located values alive longer and prevent timely zero-ing
//
@ -70,6 +74,20 @@ func mallocStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
return mallocgcSmallNoScanSC2(size, typ, needzero)
}
}
if !isSlowPath_ {
forceSlowPath := debug.malloc || gcBlackenEnabled != 0 || (goexperiment.RuntimeSecret && getg().secret > 0)
if forceSlowPath {
if isTiny_ {
return mallocgcTinySlowPath(size, typ, needzero)
}
const spc = spanClass(sizeclass_<<1) | spanClass(noscanint_)
const elemsize = uintptr(elemsize_)
return mallocgcSlowPathStub(size, typ, needzero, spc, elemsize)
}
}
if doubleCheckMalloc {
if gcphase == _GCmarktermination {
throw("mallocgc called with gcphase == _GCmarktermination")
@ -82,7 +100,7 @@ func mallocStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
lockRankMayQueueFinalizer()
// Pre-malloc debug hooks.
if debug.malloc {
if isSlowPath_ && debug.malloc {
if x := preMallocgcDebug(size, typ); x != nil {
return x
}
@ -90,7 +108,7 @@ func mallocStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
// Assist the GC if needed. (On the reuse path, we currently compensate for this;
// changes here might require changes there.)
if gcBlackenEnabled != 0 {
if isSlowPath_ && gcBlackenEnabled != 0 {
deductAssistCredit(size)
}
@ -99,7 +117,7 @@ func mallocStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
}
func postMallocgc(x unsafe.Pointer, typ *_type, size uintptr, elemsize uintptr) {
if !isTiny_ {
if isSlowPath_ && !isTiny_ {
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
// Mark any object allocated while in secret mode as secret.
@ -109,14 +127,14 @@ func postMallocgc(x unsafe.Pointer, typ *_type, size uintptr, elemsize uintptr)
}
// Adjust our GC assist debt to account for internal fragmentation.
if gcBlackenEnabled != 0 && elemsize != 0 {
if isSlowPath_ && gcBlackenEnabled != 0 && elemsize != 0 {
if assistG := getg().m.curg; assistG != nil {
assistG.gcAssistBytes -= int64(elemsize - size)
}
}
// Post-malloc debug hooks.
if debug.malloc {
if isSlowPath_ && debug.malloc {
postMallocgcDebug(x, elemsize, typ)
}
}
@ -204,7 +222,7 @@ func smallStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
}
}
v := nextFreeFastStub(span)
v := nextFreeFastStub(span, elemsize)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(spc)
}
@ -218,7 +236,7 @@ func smallStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
if span.needzero != 0 {
memclrNoHeapPointers(x, elemsize)
}
if goarch.PtrSize == 8 && sizeclass == 1 {
if goarch.PtrSize == 8 && elemsize == 8 {
// initHeapBits already set the pointer bits for the 8-byte sizeclass
// on 64-bit platforms.
c.scanAlloc += 8
@ -440,7 +458,6 @@ func tinyStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
gcStart(t)
}
}
postMallocgc(x, typ, size, elemsize)
return x
@ -468,7 +485,7 @@ func nextFreeFastTiny(span *mspan) gclinkptr {
return nextFreeFastResult
}
func nextFreeFastStub(span *mspan) gclinkptr {
func nextFreeFastStub(span *mspan, elemsize uintptr) gclinkptr {
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache) // Is there a free object in the allocCache?
@ -479,7 +496,7 @@ func nextFreeFastStub(span *mspan) gclinkptr {
span.allocCache >>= uint(theBit + 1)
span.freeindex = freeidx
span.allocCount++
nextFreeFastResult = gclinkptr(uintptr(result)*elemsize_ + span.base())
nextFreeFastResult = gclinkptr(uintptr(result)*elemsize + span.base())
}
}
}
@ -538,7 +555,7 @@ func writeHeapBitsSmallStub(span *mspan, x, dataSize uintptr, typ *_type) uintpt
o := (x - span.base()) / goarch.PtrSize
i := o / ptrBits
j := o % ptrBits
const bits uintptr = elemsize / goarch.PtrSize
var bits uintptr = elemsize / goarch.PtrSize
// In the if statement below, we have to do two uintptr writes if the bits
// we need to write straddle across two different memory locations. But if
// the number of bits we're writing divides evenly into the number of bits
@ -547,7 +564,7 @@ func writeHeapBitsSmallStub(span *mspan, x, dataSize uintptr, typ *_type) uintpt
// a power of two less than or equal to ptrBits, the compiler can remove the
// 'two writes' branch of the if statement and always do only one write without
// the check.
const bitsIsPowerOfTwo = bits&(bits-1) == 0
var bitsIsPowerOfTwo = bits&(bits-1) == 0
if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) {
// Two writes.
bits0 := ptrBits - j