mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
encoding/xml: add InputOffset method to Decoder
Among other things, this allows users to match the decoded pieces with the original XML, which can be necessary for implementing standards like XML signatures. Fixes #8484. LGTM=bradfitz R=bradfitz CC=golang-codereviews https://golang.org/cl/122960043
This commit is contained in:
parent
fef54b22f8
commit
b91aea5536
2 changed files with 38 additions and 4 deletions
|
|
@ -29,6 +29,7 @@ import (
|
||||||
type SyntaxError struct {
|
type SyntaxError struct {
|
||||||
Msg string
|
Msg string
|
||||||
Line int
|
Line int
|
||||||
|
Byte int64 // byte offset from start of stream
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *SyntaxError) Error() string {
|
func (e *SyntaxError) Error() string {
|
||||||
|
|
@ -196,6 +197,7 @@ type Decoder struct {
|
||||||
ns map[string]string
|
ns map[string]string
|
||||||
err error
|
err error
|
||||||
line int
|
line int
|
||||||
|
offset int64
|
||||||
unmarshalDepth int
|
unmarshalDepth int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -859,9 +861,17 @@ func (d *Decoder) getc() (b byte, ok bool) {
|
||||||
if b == '\n' {
|
if b == '\n' {
|
||||||
d.line++
|
d.line++
|
||||||
}
|
}
|
||||||
|
d.offset++
|
||||||
return b, true
|
return b, true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// InputOffset returns the input stream byte offset of the current decoder position.
|
||||||
|
// The offset gives the location of the end of the most recently returned token
|
||||||
|
// and the beginning of the next token.
|
||||||
|
func (d *Decoder) InputOffset() int64 {
|
||||||
|
return d.offset
|
||||||
|
}
|
||||||
|
|
||||||
// Return saved offset.
|
// Return saved offset.
|
||||||
// If we did ungetc (nextByte >= 0), have to back up one.
|
// If we did ungetc (nextByte >= 0), have to back up one.
|
||||||
func (d *Decoder) savedOffset() int {
|
func (d *Decoder) savedOffset() int {
|
||||||
|
|
@ -891,6 +901,7 @@ func (d *Decoder) ungetc(b byte) {
|
||||||
d.line--
|
d.line--
|
||||||
}
|
}
|
||||||
d.nextByte = int(b)
|
d.nextByte = int(b)
|
||||||
|
d.offset--
|
||||||
}
|
}
|
||||||
|
|
||||||
var entity = map[string]int{
|
var entity = map[string]int{
|
||||||
|
|
|
||||||
|
|
@ -170,7 +170,7 @@ var xmlInput = []string{
|
||||||
func TestRawToken(t *testing.T) {
|
func TestRawToken(t *testing.T) {
|
||||||
d := NewDecoder(strings.NewReader(testInput))
|
d := NewDecoder(strings.NewReader(testInput))
|
||||||
d.Entity = testEntity
|
d.Entity = testEntity
|
||||||
testRawToken(t, d, rawTokens)
|
testRawToken(t, d, testInput, rawTokens)
|
||||||
}
|
}
|
||||||
|
|
||||||
const nonStrictInput = `
|
const nonStrictInput = `
|
||||||
|
|
@ -225,7 +225,7 @@ var nonStrictTokens = []Token{
|
||||||
func TestNonStrictRawToken(t *testing.T) {
|
func TestNonStrictRawToken(t *testing.T) {
|
||||||
d := NewDecoder(strings.NewReader(nonStrictInput))
|
d := NewDecoder(strings.NewReader(nonStrictInput))
|
||||||
d.Strict = false
|
d.Strict = false
|
||||||
testRawToken(t, d, nonStrictTokens)
|
testRawToken(t, d, nonStrictInput, nonStrictTokens)
|
||||||
}
|
}
|
||||||
|
|
||||||
type downCaser struct {
|
type downCaser struct {
|
||||||
|
|
@ -254,7 +254,7 @@ func TestRawTokenAltEncoding(t *testing.T) {
|
||||||
}
|
}
|
||||||
return &downCaser{t, input.(io.ByteReader)}, nil
|
return &downCaser{t, input.(io.ByteReader)}, nil
|
||||||
}
|
}
|
||||||
testRawToken(t, d, rawTokensAltEncoding)
|
testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRawTokenAltEncodingNoConverter(t *testing.T) {
|
func TestRawTokenAltEncodingNoConverter(t *testing.T) {
|
||||||
|
|
@ -280,9 +280,12 @@ func TestRawTokenAltEncodingNoConverter(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func testRawToken(t *testing.T, d *Decoder, rawTokens []Token) {
|
func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
|
||||||
|
lastEnd := int64(0)
|
||||||
for i, want := range rawTokens {
|
for i, want := range rawTokens {
|
||||||
|
start := d.InputOffset()
|
||||||
have, err := d.RawToken()
|
have, err := d.RawToken()
|
||||||
|
end := d.InputOffset()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("token %d: unexpected error: %s", i, err)
|
t.Fatalf("token %d: unexpected error: %s", i, err)
|
||||||
}
|
}
|
||||||
|
|
@ -300,6 +303,26 @@ func testRawToken(t *testing.T, d *Decoder, rawTokens []Token) {
|
||||||
}
|
}
|
||||||
t.Errorf("token %d = %s, want %s", i, shave, swant)
|
t.Errorf("token %d = %s, want %s", i, shave, swant)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check that InputOffset returned actual token.
|
||||||
|
switch {
|
||||||
|
case start < lastEnd:
|
||||||
|
t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
|
||||||
|
case start >= end:
|
||||||
|
// Special case: EndElement can be synthesized.
|
||||||
|
if start == end && end == lastEnd {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
|
||||||
|
case end > int64(len(raw)):
|
||||||
|
t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
|
||||||
|
default:
|
||||||
|
text := raw[start:end]
|
||||||
|
if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
|
||||||
|
t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lastEnd = end
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue