mirror of
				https://github.com/golang/go.git
				synced 2025-11-04 02:30:57 +00:00 
			
		
		
		
	Surrogate halves are part of UTF-16 and should never appear in UTF-8.
(The rune that two combined halves represent in UTF-16 should
be encoded directly.)
Encoding: encode as RuneError.
Decoding: convert to RuneError, consume one byte.
This requires changing:
        package unicode/utf8
        runtime for range over string
Also added utf8.ValidRune and fixed bug in utf.RuneLen.
Fixes #3927.
R=golang-dev, rsc, bsiegert
CC=golang-dev
https://golang.org/cl/6458099
		
	
			
		
			
				
	
	
		
			71 lines
		
	
	
	
		
			1.5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			71 lines
		
	
	
	
		
			1.5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
// run
 | 
						|
 | 
						|
// Copyright 2009 The Go Authors. All rights reserved.
 | 
						|
// Use of this source code is governed by a BSD-style
 | 
						|
// license that can be found in the LICENSE file.
 | 
						|
 | 
						|
// Test range over strings.
 | 
						|
 | 
						|
package main
 | 
						|
 | 
						|
import (
 | 
						|
	"fmt"
 | 
						|
	"os"
 | 
						|
	"unicode/utf8"
 | 
						|
)
 | 
						|
 | 
						|
func main() {
 | 
						|
	s := "\000\123\x00\xca\xFE\u0123\ubabe\U0000babe\U0010FFFFx"
 | 
						|
	expect := []rune{0, 0123, 0, 0xFFFD, 0xFFFD, 0x123, 0xbabe, 0xbabe, 0x10FFFF, 'x'}
 | 
						|
	offset := 0
 | 
						|
	var i int
 | 
						|
	var c rune
 | 
						|
	ok := true
 | 
						|
	cnum := 0
 | 
						|
	for i, c = range s {
 | 
						|
		r, size := utf8.DecodeRuneInString(s[i:len(s)]) // check it another way
 | 
						|
		if i != offset {
 | 
						|
			fmt.Printf("unexpected offset %d not %d\n", i, offset)
 | 
						|
			ok = false
 | 
						|
		}
 | 
						|
		if r != expect[cnum] {
 | 
						|
			fmt.Printf("unexpected rune %d from DecodeRuneInString: %x not %x\n", i, r, expect[cnum])
 | 
						|
			ok = false
 | 
						|
		}
 | 
						|
		if c != expect[cnum] {
 | 
						|
			fmt.Printf("unexpected rune %d from range: %x not %x\n", i, r, expect[cnum])
 | 
						|
			ok = false
 | 
						|
		}
 | 
						|
		offset += size
 | 
						|
		cnum++
 | 
						|
	}
 | 
						|
	if i != len(s)-1 {
 | 
						|
		fmt.Println("after loop i is", i, "not", len(s)-1)
 | 
						|
		ok = false
 | 
						|
	}
 | 
						|
 | 
						|
	i = 12345
 | 
						|
	c = 23456
 | 
						|
	for i, c = range "" {
 | 
						|
	}
 | 
						|
	if i != 12345 {
 | 
						|
		fmt.Println("range empty string assigned to index:", i)
 | 
						|
		ok = false
 | 
						|
	}
 | 
						|
	if c != 23456 {
 | 
						|
		fmt.Println("range empty string assigned to value:", c)
 | 
						|
		ok = false
 | 
						|
	}
 | 
						|
 | 
						|
	for _, c := range "a\xed\xa0\x80a" {
 | 
						|
		if c != 'a' && c != utf8.RuneError {
 | 
						|
			fmt.Printf("surrogate UTF-8 does not error: %U\n", c)
 | 
						|
			ok = false
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	if !ok {
 | 
						|
		fmt.Println("BUG: stringrange")
 | 
						|
		os.Exit(1)
 | 
						|
	}
 | 
						|
}
 |