xml: disallow invalid Unicode code points

Fixes #1259.

R=rsc
CC=golang-dev
https://golang.org/cl/2967041
This commit is contained in:
Nigel Kerr 2010-12-09 14:51:01 -05:00 committed by Russ Cox
parent 3e2231e41f
commit 27f2d5ce8c
2 changed files with 69 additions and 0 deletions

View file

@ -16,6 +16,7 @@ package xml
import (
"bufio"
"bytes"
"fmt"
"io"
"os"
"strconv"
@ -871,6 +872,21 @@ Input:
data := p.buf.Bytes()
data = data[0 : len(data)-trunc]
// Inspect each rune for being a disallowed character.
buf := data
for len(buf) > 0 {
r, size := utf8.DecodeRune(buf)
if r == utf8.RuneError && size == 1 {
p.err = p.syntaxError("invalid UTF-8")
return nil
}
buf = buf[size:]
if !isInCharacterRange(r) {
p.err = p.syntaxError(fmt.Sprintf("illegal character code %U", r))
return nil
}
}
// Must rewrite \r and \r\n into \n.
w := 0
for r := 0; r < len(data); r++ {
@ -887,6 +903,18 @@ Input:
return data[0:w]
}
// Decide whether the given rune is in the XML Character Range, per
// the Char production of http://www.xml.com/axml/testaxml.htm,
// Section 2.2 Characters.
func isInCharacterRange(rune int) (inrange bool) {
return rune == 0x09 ||
rune == 0x0A ||
rune == 0x0D ||
rune >= 0x20 && rune <= 0xDF77 ||
rune >= 0xE000 && rune <= 0xFFFD ||
rune >= 0x10000 && rune <= 0x10FFFF
}
// Get name space name: name with a : stuck in the middle.
// The part before the : is the name space identifier.
func (p *Parser) nsname() (name Name, ok bool) {