mirror of
				https://github.com/golang/go.git
				synced 2025-10-31 08:40:55 +00:00 
			
		
		
		
	 c48b77b1b5
			
		
	
	
		c48b77b1b5
		
	
	
	
	
		
			
			Surrogate halves are part of UTF-16 and should never appear in UTF-8.
(The rune that two combined halves represent in UTF-16 should
be encoded directly.)
Encoding: encode as RuneError.
Decoding: convert to RuneError, consume one byte.
This requires changing:
        package unicode/utf8
        runtime for range over string
Also added utf8.ValidRune and fixed bug in utf.RuneLen.
Fixes #3927.
R=golang-dev, rsc, bsiegert
CC=golang-dev
https://golang.org/cl/6458099
		
	
			
		
			
				
	
	
		
			71 lines
		
	
	
	
		
			1.5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			71 lines
		
	
	
	
		
			1.5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // run
 | |
| 
 | |
| // Copyright 2009 The Go Authors. All rights reserved.
 | |
| // Use of this source code is governed by a BSD-style
 | |
| // license that can be found in the LICENSE file.
 | |
| 
 | |
| // Test range over strings.
 | |
| 
 | |
| package main
 | |
| 
 | |
| import (
 | |
| 	"fmt"
 | |
| 	"os"
 | |
| 	"unicode/utf8"
 | |
| )
 | |
| 
 | |
| func main() {
 | |
| 	s := "\000\123\x00\xca\xFE\u0123\ubabe\U0000babe\U0010FFFFx"
 | |
| 	expect := []rune{0, 0123, 0, 0xFFFD, 0xFFFD, 0x123, 0xbabe, 0xbabe, 0x10FFFF, 'x'}
 | |
| 	offset := 0
 | |
| 	var i int
 | |
| 	var c rune
 | |
| 	ok := true
 | |
| 	cnum := 0
 | |
| 	for i, c = range s {
 | |
| 		r, size := utf8.DecodeRuneInString(s[i:len(s)]) // check it another way
 | |
| 		if i != offset {
 | |
| 			fmt.Printf("unexpected offset %d not %d\n", i, offset)
 | |
| 			ok = false
 | |
| 		}
 | |
| 		if r != expect[cnum] {
 | |
| 			fmt.Printf("unexpected rune %d from DecodeRuneInString: %x not %x\n", i, r, expect[cnum])
 | |
| 			ok = false
 | |
| 		}
 | |
| 		if c != expect[cnum] {
 | |
| 			fmt.Printf("unexpected rune %d from range: %x not %x\n", i, r, expect[cnum])
 | |
| 			ok = false
 | |
| 		}
 | |
| 		offset += size
 | |
| 		cnum++
 | |
| 	}
 | |
| 	if i != len(s)-1 {
 | |
| 		fmt.Println("after loop i is", i, "not", len(s)-1)
 | |
| 		ok = false
 | |
| 	}
 | |
| 
 | |
| 	i = 12345
 | |
| 	c = 23456
 | |
| 	for i, c = range "" {
 | |
| 	}
 | |
| 	if i != 12345 {
 | |
| 		fmt.Println("range empty string assigned to index:", i)
 | |
| 		ok = false
 | |
| 	}
 | |
| 	if c != 23456 {
 | |
| 		fmt.Println("range empty string assigned to value:", c)
 | |
| 		ok = false
 | |
| 	}
 | |
| 
 | |
| 	for _, c := range "a\xed\xa0\x80a" {
 | |
| 		if c != 'a' && c != utf8.RuneError {
 | |
| 			fmt.Printf("surrogate UTF-8 does not error: %U\n", c)
 | |
| 			ok = false
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if !ok {
 | |
| 		fmt.Println("BUG: stringrange")
 | |
| 		os.Exit(1)
 | |
| 	}
 | |
| }
 |