mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
net/url: speed up escape and unescape
This change adds a generated 8-bit bitmask for use in functions shouldEscape and ishex.
Function shouldEscape is now inlineable. Function escape is now much faster;
function unescape is a bit faster. Here are some benchmark results (no change
to allocations):
goos: darwin
goarch: amd64
pkg: net/url
cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
│ old │ new │
│ sec/op │ sec/op vs base │
QueryEscape/#00-8 58.38n ± 1% 35.98n ± 1% -38.38% (p=0.000 n=20)
QueryEscape/#01-8 303.50n ± 0% 94.77n ± 0% -68.77% (p=0.000 n=20)
QueryEscape/#02-8 202.90n ± 0% 78.66n ± 1% -61.23% (p=0.000 n=20)
QueryEscape/#03-8 444.5n ± 0% 145.9n ± 0% -67.17% (p=0.000 n=20)
QueryEscape/#04-8 2678.0n ± 0% 913.7n ± 0% -65.88% (p=0.000 n=20)
PathEscape/#00-8 81.34n ± 0% 44.64n ± 1% -45.12% (p=0.000 n=20)
PathEscape/#01-8 307.65n ± 0% 96.71n ± 1% -68.56% (p=0.000 n=20)
PathEscape/#02-8 200.80n ± 1% 78.25n ± 0% -61.03% (p=0.000 n=20)
PathEscape/#03-8 450.1n ± 1% 145.5n ± 0% -67.67% (p=0.000 n=20)
PathEscape/#04-8 2663.5n ± 0% 876.5n ± 0% -67.09% (p=0.000 n=20)
QueryUnescape/#00-8 53.32n ± 1% 51.67n ± 1% -3.09% (p=0.000 n=20)
QueryUnescape/#01-8 161.0n ± 1% 136.2n ± 1% -15.40% (p=0.000 n=20)
QueryUnescape/#02-8 126.1n ± 1% 118.3n ± 1% -6.23% (p=0.000 n=20)
QueryUnescape/#03-8 294.6n ± 0% 273.1n ± 0% -7.30% (p=0.000 n=20)
QueryUnescape/#04-8 1.511µ ± 0% 1.411µ ± 0% -6.62% (p=0.000 n=20)
PathUnescape/#00-8 63.84n ± 1% 53.59n ± 1% -16.05% (p=0.000 n=20)
PathUnescape/#01-8 163.6n ± 3% 137.9n ± 1% -15.71% (p=0.000 n=20)
PathUnescape/#02-8 126.4n ± 1% 119.1n ± 1% -5.78% (p=0.000 n=20)
PathUnescape/#03-8 294.2n ± 0% 273.3n ± 0% -7.12% (p=0.000 n=20)
PathUnescape/#04-8 1.554µ ± 0% 1.417µ ± 0% -8.78% (p=0.000 n=20)
geomean 277.8n 162.7n -41.44%
This change draws heavy inspiration from CL 174998, which showed promise but stalled years ago.
Updates #17860
Change-Id: Idcbb1696608998b9e2fc91e1f2a488d8f1f6028c
GitHub-Last-Rev: ff360c2f1b
GitHub-Pull-Request: golang/go#75914
Reviewed-on: https://go-review.googlesource.com/c/go/+/712200
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
Reviewed-by: Jorropo <jorropo.pgm@gmail.com>
Reviewed-by: Takuto Nagami <logica0419@gmail.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
This commit is contained in:
parent
3f6ac3a10f
commit
c4e910895b
3 changed files with 354 additions and 100 deletions
114
src/net/url/encoding_table.go
Normal file
114
src/net/url/encoding_table.go
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
// Code generated from gen_encoding_table.go using 'go generate'; DO NOT EDIT.
|
||||
|
||||
// Copyright 2025 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package url
|
||||
|
||||
type encoding uint8
|
||||
|
||||
const (
|
||||
encodePath encoding = 1 << iota
|
||||
encodePathSegment
|
||||
encodeHost
|
||||
encodeZone
|
||||
encodeUserPassword
|
||||
encodeQueryComponent
|
||||
encodeFragment
|
||||
|
||||
// hexChar is actually NOT an encoding mode, but there are only seven
|
||||
// encoding modes. We might as well abuse the otherwise unused most
|
||||
// significant bit in uint8 to indicate whether a character is
|
||||
// hexadecimal.
|
||||
hexChar
|
||||
)
|
||||
|
||||
var table = [256]encoding{
|
||||
'!': encodeFragment | encodeZone | encodeHost,
|
||||
'"': encodeZone | encodeHost,
|
||||
'$': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'&': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'\'': encodeZone | encodeHost,
|
||||
'(': encodeFragment | encodeZone | encodeHost,
|
||||
')': encodeFragment | encodeZone | encodeHost,
|
||||
'*': encodeFragment | encodeZone | encodeHost,
|
||||
'+': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
',': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePath,
|
||||
'-': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'.': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'/': encodeFragment | encodePath,
|
||||
'0': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'1': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'2': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'3': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'4': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'5': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'6': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'7': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'8': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'9': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
':': encodeFragment | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
';': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePath,
|
||||
'<': encodeZone | encodeHost,
|
||||
'=': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'>': encodeZone | encodeHost,
|
||||
'?': encodeFragment,
|
||||
'@': encodeFragment | encodePathSegment | encodePath,
|
||||
'A': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'B': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'C': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'D': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'E': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'F': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'G': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'H': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'I': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'J': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'K': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'L': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'M': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'N': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'O': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'P': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'Q': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'R': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'S': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'T': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'U': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'V': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'W': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'X': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'Y': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'Z': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'[': encodeZone | encodeHost,
|
||||
']': encodeZone | encodeHost,
|
||||
'_': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'a': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'b': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'c': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'd': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'e': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'f': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'g': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'h': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'i': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'j': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'k': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'l': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'm': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'n': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'o': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'p': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'q': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'r': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
's': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
't': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'u': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'v': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'w': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'x': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'y': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'z': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
'~': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
|
||||
}
|
||||
234
src/net/url/gen_encoding_table.go
Normal file
234
src/net/url/gen_encoding_table.go
Normal file
|
|
@ -0,0 +1,234 @@
|
|||
// Copyright 2025 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"go/format"
|
||||
"io"
|
||||
"log"
|
||||
"maps"
|
||||
"os"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// We embed this source file in the resulting code-generation program in order
|
||||
// to extract the definitions of the encoding type and constants from it and
|
||||
// include them in the generated file.
|
||||
//
|
||||
//go:embed gen_encoding_table.go
|
||||
var genSource string
|
||||
|
||||
const filename = "encoding_table.go"
|
||||
|
||||
func main() {
|
||||
var out bytes.Buffer
|
||||
fmt.Fprintln(&out, "// Code generated from gen_encoding_table.go using 'go generate'; DO NOT EDIT.")
|
||||
fmt.Fprintln(&out)
|
||||
fmt.Fprintln(&out, "// Copyright 2025 The Go Authors. All rights reserved.")
|
||||
fmt.Fprintln(&out, "// Use of this source code is governed by a BSD-style")
|
||||
fmt.Fprintln(&out, "// license that can be found in the LICENSE file.")
|
||||
fmt.Fprintln(&out)
|
||||
fmt.Fprintln(&out, "package url")
|
||||
fmt.Fprintln(&out)
|
||||
generateEnc(&out, genSource)
|
||||
generateTable(&out)
|
||||
|
||||
formatted, err := format.Source(out.Bytes())
|
||||
if err != nil {
|
||||
log.Fatal("format:", err)
|
||||
}
|
||||
|
||||
err = os.WriteFile(filename, formatted, 0644)
|
||||
if err != nil {
|
||||
log.Fatal("WriteFile:", err)
|
||||
}
|
||||
}
|
||||
|
||||
func generateEnc(w io.Writer, src string) {
|
||||
var writeLine bool
|
||||
for line := range strings.Lines(src) {
|
||||
if strings.HasPrefix(line, "// START encoding") {
|
||||
writeLine = true
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(line, "// END encoding") {
|
||||
return
|
||||
}
|
||||
if writeLine {
|
||||
fmt.Fprint(w, line)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func generateTable(w io.Writer) {
|
||||
fmt.Fprintln(w, "var table = [256]encoding{")
|
||||
|
||||
// Sort the encodings (in decreasing order) to guarantee a stable output.
|
||||
sortedEncs := slices.Sorted(maps.Keys(encNames))
|
||||
slices.Reverse(sortedEncs)
|
||||
|
||||
for i := range 256 {
|
||||
c := byte(i)
|
||||
var lineBuf bytes.Buffer
|
||||
|
||||
// Write key to line buffer.
|
||||
lineBuf.WriteString(strconv.QuoteRune(rune(c)))
|
||||
|
||||
lineBuf.WriteByte(':')
|
||||
|
||||
// Write value to line buffer.
|
||||
blankVal := true
|
||||
if ishex(c) {
|
||||
// Set the hexChar bit if this char is hexadecimal.
|
||||
lineBuf.WriteString("hexChar")
|
||||
blankVal = false
|
||||
}
|
||||
for _, enc := range sortedEncs {
|
||||
if !shouldEscape(c, enc) {
|
||||
if !blankVal {
|
||||
lineBuf.WriteByte('|')
|
||||
}
|
||||
// Set this encoding mode's bit if this char should NOT be
|
||||
// escaped.
|
||||
name := encNames[enc]
|
||||
lineBuf.WriteString(name)
|
||||
blankVal = false
|
||||
}
|
||||
}
|
||||
|
||||
if !blankVal {
|
||||
lineBuf.WriteString(",\n")
|
||||
w.Write(lineBuf.Bytes())
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(w, "}")
|
||||
}
|
||||
|
||||
// START encoding (keep this marker comment in sync with genEnc)
|
||||
type encoding uint8
|
||||
|
||||
const (
|
||||
encodePath encoding = 1 << iota
|
||||
encodePathSegment
|
||||
encodeHost
|
||||
encodeZone
|
||||
encodeUserPassword
|
||||
encodeQueryComponent
|
||||
encodeFragment
|
||||
|
||||
// hexChar is actually NOT an encoding mode, but there are only seven
|
||||
// encoding modes. We might as well abuse the otherwise unused most
|
||||
// significant bit in uint8 to indicate whether a character is
|
||||
// hexadecimal.
|
||||
hexChar
|
||||
)
|
||||
|
||||
// END encoding (keep this marker comment in sync with genEnc)
|
||||
|
||||
// Keep this in sync with the definitions of encoding mode constants.
|
||||
var encNames = map[encoding]string{
|
||||
encodePath: "encodePath",
|
||||
encodePathSegment: "encodePathSegment",
|
||||
encodeHost: "encodeHost",
|
||||
encodeZone: "encodeZone",
|
||||
encodeUserPassword: "encodeUserPassword",
|
||||
encodeQueryComponent: "encodeQueryComponent",
|
||||
encodeFragment: "encodeFragment",
|
||||
}
|
||||
|
||||
// Return true if the specified character should be escaped when
|
||||
// appearing in a URL string, according to RFC 3986.
|
||||
//
|
||||
// Please be informed that for now shouldEscape does not check all
|
||||
// reserved characters correctly. See golang.org/issue/5684.
|
||||
func shouldEscape(c byte, mode encoding) bool {
|
||||
// §2.3 Unreserved characters (alphanum)
|
||||
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
|
||||
return false
|
||||
}
|
||||
|
||||
if mode == encodeHost || mode == encodeZone {
|
||||
// §3.2.2 Host allows
|
||||
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
|
||||
// as part of reg-name.
|
||||
// We add : because we include :port as part of host.
|
||||
// We add [ ] because we include [ipv6]:port as part of host.
|
||||
// We add < > because they're the only characters left that
|
||||
// we could possibly allow, and Parse will reject them if we
|
||||
// escape them (because hosts can't use %-encoding for
|
||||
// ASCII bytes).
|
||||
switch c {
|
||||
case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
switch c {
|
||||
case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
|
||||
return false
|
||||
|
||||
case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
|
||||
// Different sections of the URL allow a few of
|
||||
// the reserved characters to appear unescaped.
|
||||
switch mode {
|
||||
case encodePath: // §3.3
|
||||
// The RFC allows : @ & = + $ but saves / ; , for assigning
|
||||
// meaning to individual path segments. This package
|
||||
// only manipulates the path as a whole, so we allow those
|
||||
// last three as well. That leaves only ? to escape.
|
||||
return c == '?'
|
||||
|
||||
case encodePathSegment: // §3.3
|
||||
// The RFC allows : @ & = + $ but saves / ; , for assigning
|
||||
// meaning to individual path segments.
|
||||
return c == '/' || c == ';' || c == ',' || c == '?'
|
||||
|
||||
case encodeUserPassword: // §3.2.1
|
||||
// The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
|
||||
// userinfo, so we must escape only '@', '/', and '?'.
|
||||
// The parsing of userinfo treats ':' as special so we must escape
|
||||
// that too.
|
||||
return c == '@' || c == '/' || c == '?' || c == ':'
|
||||
|
||||
case encodeQueryComponent: // §3.4
|
||||
// The RFC reserves (so we must escape) everything.
|
||||
return true
|
||||
|
||||
case encodeFragment: // §4.1
|
||||
// The RFC text is silent but the grammar allows
|
||||
// everything, so escape nothing.
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if mode == encodeFragment {
|
||||
// RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are
|
||||
// included in reserved from RFC 2396 §2.2. The remaining sub-delims do not
|
||||
// need to be escaped. To minimize potential breakage, we apply two restrictions:
|
||||
// (1) we always escape sub-delims outside of the fragment, and (2) we always
|
||||
// escape single quote to avoid breaking callers that had previously assumed that
|
||||
// single quotes would be escaped. See issue #19917.
|
||||
switch c {
|
||||
case '!', '(', ')', '*':
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Everything else must be escaped.
|
||||
return true
|
||||
}
|
||||
|
||||
func ishex(c byte) bool {
|
||||
return '0' <= c && c <= '9' ||
|
||||
'a' <= c && c <= 'f' ||
|
||||
'A' <= c && c <= 'F'
|
||||
}
|
||||
|
|
@ -7,6 +7,9 @@
|
|||
// See RFC 3986. This package generally follows RFC 3986, except where
|
||||
// it deviates for compatibility reasons.
|
||||
// RFC 6874 followed for IPv6 zone literals.
|
||||
|
||||
//go:generate go run gen_encoding_table.go
|
||||
|
||||
package url
|
||||
|
||||
// When sending changes, first search old issues for history on decisions.
|
||||
|
|
@ -50,15 +53,7 @@ func (e *Error) Temporary() bool {
|
|||
const upperhex = "0123456789ABCDEF"
|
||||
|
||||
func ishex(c byte) bool {
|
||||
switch {
|
||||
case '0' <= c && c <= '9':
|
||||
return true
|
||||
case 'a' <= c && c <= 'f':
|
||||
return true
|
||||
case 'A' <= c && c <= 'F':
|
||||
return true
|
||||
}
|
||||
return false
|
||||
return table[c]&hexChar != 0
|
||||
}
|
||||
|
||||
func unhex(c byte) byte {
|
||||
|
|
@ -74,18 +69,6 @@ func unhex(c byte) byte {
|
|||
}
|
||||
}
|
||||
|
||||
type encoding int
|
||||
|
||||
const (
|
||||
encodePath encoding = 1 + iota
|
||||
encodePathSegment
|
||||
encodeHost
|
||||
encodeZone
|
||||
encodeUserPassword
|
||||
encodeQueryComponent
|
||||
encodeFragment
|
||||
)
|
||||
|
||||
type EscapeError string
|
||||
|
||||
func (e EscapeError) Error() string {
|
||||
|
|
@ -98,86 +81,9 @@ func (e InvalidHostError) Error() string {
|
|||
return "invalid character " + strconv.Quote(string(e)) + " in host name"
|
||||
}
|
||||
|
||||
// Return true if the specified character should be escaped when
|
||||
// appearing in a URL string, according to RFC 3986.
|
||||
//
|
||||
// Please be informed that for now shouldEscape does not check all
|
||||
// reserved characters correctly. See golang.org/issue/5684.
|
||||
// See the reference implementation in gen_encoding_table.go.
|
||||
func shouldEscape(c byte, mode encoding) bool {
|
||||
// §2.3 Unreserved characters (alphanum)
|
||||
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
|
||||
return false
|
||||
}
|
||||
|
||||
if mode == encodeHost || mode == encodeZone {
|
||||
// §3.2.2 Host allows
|
||||
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
|
||||
// as part of reg-name.
|
||||
// We add : because we include :port as part of host.
|
||||
// We add [ ] because we include [ipv6]:port as part of host.
|
||||
// We add < > because they're the only characters left that
|
||||
// we could possibly allow, and Parse will reject them if we
|
||||
// escape them (because hosts can't use %-encoding for
|
||||
// ASCII bytes).
|
||||
switch c {
|
||||
case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
switch c {
|
||||
case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
|
||||
return false
|
||||
|
||||
case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
|
||||
// Different sections of the URL allow a few of
|
||||
// the reserved characters to appear unescaped.
|
||||
switch mode {
|
||||
case encodePath: // §3.3
|
||||
// The RFC allows : @ & = + $ but saves / ; , for assigning
|
||||
// meaning to individual path segments. This package
|
||||
// only manipulates the path as a whole, so we allow those
|
||||
// last three as well. That leaves only ? to escape.
|
||||
return c == '?'
|
||||
|
||||
case encodePathSegment: // §3.3
|
||||
// The RFC allows : @ & = + $ but saves / ; , for assigning
|
||||
// meaning to individual path segments.
|
||||
return c == '/' || c == ';' || c == ',' || c == '?'
|
||||
|
||||
case encodeUserPassword: // §3.2.1
|
||||
// The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
|
||||
// userinfo, so we must escape only '@', '/', and '?'.
|
||||
// The parsing of userinfo treats ':' as special so we must escape
|
||||
// that too.
|
||||
return c == '@' || c == '/' || c == '?' || c == ':'
|
||||
|
||||
case encodeQueryComponent: // §3.4
|
||||
// The RFC reserves (so we must escape) everything.
|
||||
return true
|
||||
|
||||
case encodeFragment: // §4.1
|
||||
// The RFC text is silent but the grammar allows
|
||||
// everything, so escape nothing.
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if mode == encodeFragment {
|
||||
// RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are
|
||||
// included in reserved from RFC 2396 §2.2. The remaining sub-delims do not
|
||||
// need to be escaped. To minimize potential breakage, we apply two restrictions:
|
||||
// (1) we always escape sub-delims outside of the fragment, and (2) we always
|
||||
// escape single quote to avoid breaking callers that had previously assumed that
|
||||
// single quotes would be escaped. See issue #19917.
|
||||
switch c {
|
||||
case '!', '(', ')', '*':
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Everything else must be escaped.
|
||||
return true
|
||||
return table[c]&mode == 0
|
||||
}
|
||||
|
||||
// QueryUnescape does the inverse transformation of [QueryEscape],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue