encoding/xml: expand allowed entity names

Previously, multi-byte characters were not allowed. Also certain single-byte
characters, such as '-', were disallowed.
Fixes #3813.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/6641052
This commit is contained in:
Patrick Smith 2012-10-21 20:33:24 -04:00 committed by Russ Cox
parent 5d05c7800e
commit 2e67dd861d
2 changed files with 154 additions and 89 deletions

View file

@ -19,6 +19,7 @@ const testInput = `
<body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
"\r\n\t" + ` >
<hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;</hello>
<query>&; &is-it;</query>
<goodbye />
<outer foo:attr="value" xmlns:tag="ns4">
<inner/>
@ -28,6 +29,8 @@ const testInput = `
</tag:name>
</body><!-- missing final newline -->`
var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
var rawTokens = []Token{
CharData("\n"),
ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
@ -41,6 +44,10 @@ var rawTokens = []Token{
CharData("World <>'\" 白鵬翔"),
EndElement{Name{"", "hello"}},
CharData("\n "),
StartElement{Name{"", "query"}, []Attr{}},
CharData("What is it?"),
EndElement{Name{"", "query"}},
CharData("\n "),
StartElement{Name{"", "goodbye"}, []Attr{}},
EndElement{Name{"", "goodbye"}},
CharData("\n "),
@ -74,6 +81,10 @@ var cookedTokens = []Token{
CharData("World <>'\" 白鵬翔"),
EndElement{Name{"ns2", "hello"}},
CharData("\n "),
StartElement{Name{"ns2", "query"}, []Attr{}},
CharData("What is it?"),
EndElement{Name{"ns2", "query"}},
CharData("\n "),
StartElement{Name{"ns2", "goodbye"}, []Attr{}},
EndElement{Name{"ns2", "goodbye"}},
CharData("\n "),
@ -156,6 +167,7 @@ var xmlInput = []string{
func TestRawToken(t *testing.T) {
d := NewDecoder(strings.NewReader(testInput))
d.Entity = testEntity
testRawToken(t, d, rawTokens)
}
@ -164,8 +176,14 @@ const nonStrictInput = `
<tag>&unknown;entity</tag>
<tag>&#123</tag>
<tag>&#zzz;</tag>
<tag>&なまえ3;</tag>
<tag>&lt-gt;</tag>
<tag>&;</tag>
<tag>&0a;</tag>
`
var nonStringEntity = map[string]string{"": "oops!", "0a": "oops!"}
var nonStrictTokens = []Token{
CharData("\n"),
StartElement{Name{"", "tag"}, []Attr{}},
@ -184,6 +202,22 @@ var nonStrictTokens = []Token{
CharData("&#zzz;"),
EndElement{Name{"", "tag"}},
CharData("\n"),
StartElement{Name{"", "tag"}, []Attr{}},
CharData("&なまえ3;"),
EndElement{Name{"", "tag"}},
CharData("\n"),
StartElement{Name{"", "tag"}, []Attr{}},
CharData("&lt-gt;"),
EndElement{Name{"", "tag"}},
CharData("\n"),
StartElement{Name{"", "tag"}, []Attr{}},
CharData("&;"),
EndElement{Name{"", "tag"}},
CharData("\n"),
StartElement{Name{"", "tag"}, []Attr{}},
CharData("&0a;"),
EndElement{Name{"", "tag"}},
CharData("\n"),
}
func TestNonStrictRawToken(t *testing.T) {
@ -317,6 +351,7 @@ func TestNestedDirectives(t *testing.T) {
func TestToken(t *testing.T) {
d := NewDecoder(strings.NewReader(testInput))
d.Entity = testEntity
for i, want := range cookedTokens {
have, err := d.Token()