mirror of
				https://github.com/python/cpython.git
				synced 2025-11-01 06:01:29 +00:00 
			
		
		
		
	Merged revisions 84655 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r84655 | antoine.pitrou | 2010-09-09 22:30:23 +0200 (jeu., 09 sept. 2010) | 6 lines Issue #9804: ascii() now always represents unicode surrogate pairs as a single `\UXXXXXXXX`, regardless of whether the character is printable or not. Also, the "backslashreplace" error handler now joins surrogate pairs into a single character on UCS-2 builds. ........
This commit is contained in:
		
							parent
							
								
									8e0bb6a1e2
								
							
						
					
					
						commit
						c9a8df24cc
					
				
					 4 changed files with 72 additions and 17 deletions
				
			
		|  | @ -678,6 +678,13 @@ static Py_UNICODE hexdigits[] = { | |||
| 
 | ||||
| PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) | ||||
| { | ||||
| #ifndef Py_UNICODE_WIDE | ||||
| #define IS_SURROGATE_PAIR(p, end) \ | ||||
|     (*p >= 0xD800 && *p <= 0xDBFF && (p + 1) < end && \ | ||||
|      *(p + 1) >= 0xDC00 && *(p + 1) <= 0xDFFF) | ||||
| #else | ||||
| #define IS_SURROGATE_PAIR(p, end) 0 | ||||
| #endif | ||||
|     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { | ||||
|         PyObject *restuple; | ||||
|         PyObject *object; | ||||
|  | @ -702,7 +709,12 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) | |||
|             else | ||||
| #endif | ||||
|             if (*p >= 0x100) { | ||||
|                 ressize += 1+1+4; | ||||
|                 if (IS_SURROGATE_PAIR(p, startp+end)) { | ||||
|                     ressize += 1+1+8; | ||||
|                     ++p; | ||||
|                 } | ||||
|                 else | ||||
|                     ressize += 1+1+4; | ||||
|             } | ||||
|             else | ||||
|                 ressize += 1+1+2; | ||||
|  | @ -712,9 +724,12 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) | |||
|             return NULL; | ||||
|         for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); | ||||
|             p < startp+end; ++p) { | ||||
|             Py_UNICODE c = *p; | ||||
|             Py_UCS4 c = (Py_UCS4) *p; | ||||
|             *outp++ = '\\'; | ||||
| #ifdef Py_UNICODE_WIDE | ||||
|             if (IS_SURROGATE_PAIR(p, startp+end)) { | ||||
|                 c = ((*p & 0x3FF) << 10) + (*(p + 1) & 0x3FF) + 0x10000; | ||||
|                 ++p; | ||||
|             } | ||||
|             if (c >= 0x00010000) { | ||||
|                 *outp++ = 'U'; | ||||
|                 *outp++ = hexdigits[(c>>28)&0xf]; | ||||
|  | @ -724,9 +739,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) | |||
|                 *outp++ = hexdigits[(c>>12)&0xf]; | ||||
|                 *outp++ = hexdigits[(c>>8)&0xf]; | ||||
|             } | ||||
|             else | ||||
| #endif | ||||
|             if (c >= 0x100) { | ||||
|             else if (c >= 0x100) { | ||||
|                 *outp++ = 'u'; | ||||
|                 *outp++ = hexdigits[(c>>12)&0xf]; | ||||
|                 *outp++ = hexdigits[(c>>8)&0xf]; | ||||
|  | @ -746,6 +759,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) | |||
|         wrong_exception_type(exc); | ||||
|         return NULL; | ||||
|     } | ||||
| #undef IS_SURROGATE_PAIR | ||||
| } | ||||
| 
 | ||||
| /* This handler is declared static until someone demonstrates
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Antoine Pitrou
						Antoine Pitrou