encoding/xml: correctly escape newline, carriage return, and tab

The generated encodings are those from
http://www.w3.org/TR/2000/WD-xml-c14n-20000119.html#charescaping

The change to the decoder ensures that we turn 
 in the
input into \r, not \n.

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/6747043
This commit is contained in:
Ian Lance Taylor 2012-10-18 13:40:45 -07:00
parent 2abaaefa72
commit 1e6d9f49da
2 changed files with 41 additions and 15 deletions

View file

@ -964,7 +964,16 @@ Input:
b0, b1 = 0, 0
continue Input
}
d.buf.WriteByte(b)
// We must rewrite unescaped \r and \r\n into \n.
if b == '\r' {
d.buf.WriteByte('\n')
} else if b1 == '\r' && b == '\n' {
// Skip \r\n--we already wrote \n.
} else {
d.buf.WriteByte(b)
}
b0, b1 = b1, b
}
data := d.buf.Bytes()
@ -985,20 +994,7 @@ Input:
}
}
// Must rewrite \r and \r\n into \n.
w := 0
for r := 0; r < len(data); r++ {
b := data[r]
if b == '\r' {
if r+1 < len(data) && data[r+1] == '\n' {
continue
}
b = '\n'
}
data[w] = b
w++
}
return data[0:w]
return data
}
// Decide whether the given rune is in the XML Character Range, per
@ -1689,6 +1685,9 @@ var (
esc_amp = []byte("&amp;")
esc_lt = []byte("&lt;")
esc_gt = []byte("&gt;")
esc_tab = []byte("&#x9;")
esc_nl = []byte("&#xA;")
esc_cr = []byte("&#xD;")
)
// Escape writes to w the properly escaped XML equivalent
@ -1708,6 +1707,12 @@ func Escape(w io.Writer, s []byte) {
esc = esc_lt
case '>':
esc = esc_gt
case '\t':
esc = esc_tab
case '\n':
esc = esc_nl
case '\r':
esc = esc_cr
default:
continue
}