mime: reduce allocs incurred by ParseMediaType

This change is mostly gardening. It simplifies ParseMediaType and its
helper functions and reduces the amount of allocations they incur.

Here are some benchmark results:

goos: darwin
goarch: amd64
pkg: mime
cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
                      │     old     │                new                 │
                      │   sec/op    │   sec/op     vs base               │
ParseMediaType-8        55.26µ ± 1%   54.54µ ± 1%  -1.30% (p=0.000 n=20)
ParseMediaTypeBogus-8   3.551µ ± 0%   3.427µ ± 0%  -3.48% (p=0.000 n=20)
geomean                 14.01µ        13.67µ       -2.39%

                      │     old      │                 new                 │
                      │     B/op     │     B/op      vs base               │
ParseMediaType-8        38.48Ki ± 0%   37.38Ki ± 0%  -2.85% (p=0.000 n=20)
ParseMediaTypeBogus-8   2.531Ki ± 0%   2.469Ki ± 0%  -2.47% (p=0.000 n=20)
geomean                 9.869Ki        9.606Ki       -2.66%

                      │    old     │                new                 │
                      │ allocs/op  │ allocs/op   vs base                │
ParseMediaType-8        457.0 ± 0%   425.0 ± 0%   -7.00% (p=0.000 n=20)
ParseMediaTypeBogus-8   25.00 ± 0%   21.00 ± 0%  -16.00% (p=0.000 n=20)
geomean                 106.9        94.47       -11.62%

Change-Id: I51198b40396afa51531794a57c50aa88975eae1d
GitHub-Last-Rev: c44e2a2577
GitHub-Pull-Request: golang/go#75565
Reviewed-on: https://go-review.googlesource.com/c/go/+/705715
Reviewed-by: Emmanuel Odeke <emmanuel@orijtech.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Sean Liao <sean@liao.dev>
Reviewed-by: Damien Neil <dneil@google.com>
Auto-Submit: Emmanuel Odeke <emmanuel@orijtech.com>
This commit is contained in:
Julien Cretel 2025-09-22 18:05:12 +00:00 committed by Gopher Robot
parent 08afc50bea
commit 19cc1022ba

View file

@ -98,24 +98,32 @@ func FormatMediaType(t string, param map[string]string) string {
func checkMediaTypeDisposition(s string) error {
typ, rest := consumeToken(s)
if typ == "" {
return errors.New("mime: no media type")
return errNoMediaType
}
if rest == "" {
return nil
}
if !strings.HasPrefix(rest, "/") {
return errors.New("mime: expected slash after first token")
var ok bool
if rest, ok = strings.CutPrefix(rest, "/"); !ok {
return errNoSlashAfterFirstToken
}
subtype, rest := consumeToken(rest[1:])
subtype, rest := consumeToken(rest)
if subtype == "" {
return errors.New("mime: expected token after slash")
return errNoTokenAfterSlash
}
if rest != "" {
return errors.New("mime: unexpected content after media subtype")
return errUnexpectedContentAfterMediaSubtype
}
return nil
}
var (
errNoMediaType = errors.New("mime: no media type")
errNoSlashAfterFirstToken = errors.New("mime: expected slash after first token")
errNoTokenAfterSlash = errors.New("mime: expected token after slash")
errUnexpectedContentAfterMediaSubtype = errors.New("mime: unexpected content after media subtype")
)
// ErrInvalidMediaParameter is returned by [ParseMediaType] if
// the media type value was found but there was an error parsing
// the optional parameters
@ -169,7 +177,6 @@ func ParseMediaType(v string) (mediatype string, params map[string]string, err e
if continuation == nil {
continuation = make(map[string]map[string]string)
}
var ok bool
if pmap, ok = continuation[baseName]; !ok {
continuation[baseName] = make(map[string]string)
pmap = continuation[baseName]
@ -177,7 +184,7 @@ func ParseMediaType(v string) (mediatype string, params map[string]string, err e
}
if v, exists := pmap[key]; exists && v != value {
// Duplicate parameter names are incorrect, but we allow them if they are equal.
return "", nil, errors.New("mime: duplicate parameter name")
return "", nil, errDuplicateParamName
}
pmap[key] = value
v = rest
@ -227,27 +234,28 @@ func ParseMediaType(v string) (mediatype string, params map[string]string, err e
return
}
var errDuplicateParamName = errors.New("mime: duplicate parameter name")
func decode2231Enc(v string) (string, bool) {
sv := strings.SplitN(v, "'", 3)
if len(sv) != 3 {
charset, v, ok := strings.Cut(v, "'")
if !ok {
return "", false
}
// TODO: ignoring lang in sv[1] for now. If anybody needs it we'll
// TODO: ignoring the language part for now. If anybody needs it, we'll
// need to decide how to expose it in the API. But I'm not sure
// anybody uses it in practice.
charset := strings.ToLower(sv[0])
if len(charset) == 0 {
_, extOtherVals, ok := strings.Cut(v, "'")
if !ok {
return "", false
}
if charset != "us-ascii" && charset != "utf-8" {
// TODO: unsupported encoding
charset = strings.ToLower(charset)
switch charset {
case "us-ascii", "utf-8":
default:
// Empty or unsupported encoding.
return "", false
}
encv, err := percentHexUnescape(sv[2])
if err != nil {
return "", false
}
return encv, true
return percentHexUnescape(extOtherVals)
}
// consumeToken consumes a token from the beginning of provided
@ -309,11 +317,11 @@ func consumeValue(v string) (value, rest string) {
func consumeMediaParam(v string) (param, value, rest string) {
rest = strings.TrimLeftFunc(v, unicode.IsSpace)
if !strings.HasPrefix(rest, ";") {
var ok bool
if rest, ok = strings.CutPrefix(rest, ";"); !ok {
return "", "", v
}
rest = rest[1:] // consume semicolon
rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
param, rest = consumeToken(rest)
param = strings.ToLower(param)
@ -322,10 +330,9 @@ func consumeMediaParam(v string) (param, value, rest string) {
}
rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
if !strings.HasPrefix(rest, "=") {
if rest, ok = strings.CutPrefix(rest, "="); !ok {
return "", "", v
}
rest = rest[1:] // consume equals sign
rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
value, rest2 := consumeValue(rest)
if value == "" && rest2 == rest {
@ -335,7 +342,7 @@ func consumeMediaParam(v string) (param, value, rest string) {
return param, value, rest
}
func percentHexUnescape(s string) (string, error) {
func percentHexUnescape(s string) (string, bool) {
// Count %, check that they're well-formed.
percents := 0
for i := 0; i < len(s); {
@ -345,16 +352,12 @@ func percentHexUnescape(s string) (string, error) {
}
percents++
if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
s = s[i:]
if len(s) > 3 {
s = s[0:3]
}
return "", fmt.Errorf("mime: bogus characters after %%: %q", s)
return "", false
}
i += 3
}
if percents == 0 {
return s, nil
return s, true
}
t := make([]byte, len(s)-2*percents)
@ -371,7 +374,7 @@ func percentHexUnescape(s string) (string, error) {
i++
}
}
return string(t), nil
return string(t), true
}
func ishex(c byte) bool {