mirror of
				https://github.com/python/cpython.git
				synced 2025-10-26 11:14:33 +00:00 
			
		
		
		
	Marc-Andre's third try at this bulk patch seems to work (except that
his copy of test_contains.py seems to be broken -- the lines he deleted were already absent). Checkin messages: New Unicode support for int(), float(), complex() and long(). - new APIs PyInt_FromUnicode() and PyLong_FromUnicode() - added support for Unicode to PyFloat_FromString() - new encoding API PyUnicode_EncodeDecimal() which converts Unicode to a decimal char* string (used in the above new APIs) - shortcuts for calls like int(<int object>) and float(<float obj>) - tests for all of the above Unicode compares and contains checks: - comparing Unicode and non-string types now works; TypeErrors are masked, all other errors such as ValueError during Unicode coercion are passed through (note that PyUnicode_Compare does not implement the masking -- PyObject_Compare does this) - contains now works for non-string types too; TypeErrors are masked and 0 returned; all other errors are passed through Better testing support for the standard codecs. Misc minor enhancements, such as an alias dbcs for the mbcs codec. Changes: - PyLong_FromString() now applies the same error checks as does PyInt_FromString(): trailing garbage is reported as error and not longer silently ignored. The only characters which may be trailing the digits are 'L' and 'l' -- these are still silently ignored. - string.ato?() now directly interface to int(), long() and float(). The error strings are now a little different, but the type still remains the same. These functions are now ready to get declared obsolete ;-) - PyNumber_Int() now also does a check for embedded NULL chars in the input string; PyNumber_Long() already did this (and still does) Followed by: Looks like I've gone a step too far there... (and test_contains.py seem to have a bug too). I've changed back to reporting all errors in PyUnicode_Contains() and added a few more test cases to test_contains.py (plus corrected the join() NameError).
This commit is contained in:
		
							parent
							
								
									457855a5f0
								
							
						
					
					
						commit
						9e896b37c7
					
				
					 17 changed files with 421 additions and 115 deletions
				
			
		|  | @ -72,6 +72,7 @@ PERFORMANCE OF THIS SOFTWARE. | ||||||
| 
 | 
 | ||||||
| #include "pydebug.h" | #include "pydebug.h" | ||||||
| 
 | 
 | ||||||
|  | #include "unicodeobject.h" | ||||||
| #include "intobject.h" | #include "intobject.h" | ||||||
| #include "longobject.h" | #include "longobject.h" | ||||||
| #include "floatobject.h" | #include "floatobject.h" | ||||||
|  | @ -92,7 +93,6 @@ PERFORMANCE OF THIS SOFTWARE. | ||||||
| #include "cobject.h" | #include "cobject.h" | ||||||
| #include "traceback.h" | #include "traceback.h" | ||||||
| #include "sliceobject.h" | #include "sliceobject.h" | ||||||
| #include "unicodeobject.h" |  | ||||||
| 
 | 
 | ||||||
| #include "codecs.h" | #include "codecs.h" | ||||||
| #include "pyerrors.h" | #include "pyerrors.h" | ||||||
|  |  | ||||||
|  | @ -60,6 +60,7 @@ extern DL_IMPORT(PyTypeObject) PyInt_Type; | ||||||
| #define PyInt_Check(op) ((op)->ob_type == &PyInt_Type) | #define PyInt_Check(op) ((op)->ob_type == &PyInt_Type) | ||||||
| 
 | 
 | ||||||
| extern DL_IMPORT(PyObject *) PyInt_FromString Py_PROTO((char*, char**, int)); | extern DL_IMPORT(PyObject *) PyInt_FromString Py_PROTO((char*, char**, int)); | ||||||
|  | extern DL_IMPORT(PyObject *) PyInt_FromUnicode Py_PROTO((Py_UNICODE*, int, int)); | ||||||
| extern DL_IMPORT(PyObject *) PyInt_FromLong Py_PROTO((long)); | extern DL_IMPORT(PyObject *) PyInt_FromLong Py_PROTO((long)); | ||||||
| extern DL_IMPORT(long) PyInt_AsLong Py_PROTO((PyObject *)); | extern DL_IMPORT(long) PyInt_AsLong Py_PROTO((PyObject *)); | ||||||
| extern DL_IMPORT(long) PyInt_GetMax Py_PROTO((void)); | extern DL_IMPORT(long) PyInt_GetMax Py_PROTO((void)); | ||||||
|  |  | ||||||
|  | @ -82,6 +82,7 @@ extern DL_IMPORT(unsigned LONG_LONG) PyLong_AsUnsignedLongLong Py_PROTO((PyObjec | ||||||
| #endif /* HAVE_LONG_LONG */ | #endif /* HAVE_LONG_LONG */ | ||||||
| 
 | 
 | ||||||
| DL_IMPORT(PyObject *) PyLong_FromString Py_PROTO((char *, char **, int)); | DL_IMPORT(PyObject *) PyLong_FromString Py_PROTO((char *, char **, int)); | ||||||
|  | DL_IMPORT(PyObject *) PyLong_FromUnicode Py_PROTO((Py_UNICODE*, int, int)); | ||||||
| 
 | 
 | ||||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -358,7 +358,7 @@ extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF8( | ||||||
| 
 | 
 | ||||||
| /* --- UTF-16 Codecs ------------------------------------------------------ */ | /* --- UTF-16 Codecs ------------------------------------------------------ */ | ||||||
| 
 | 
 | ||||||
| /* Decodes length bytes from a UTF-16 encoded buffer string and return
 | /* Decodes length bytes from a UTF-16 encoded buffer string and returns
 | ||||||
|    the corresponding Unicode object. |    the corresponding Unicode object. | ||||||
| 
 | 
 | ||||||
|    errors (if non-NULL) defines the error handling. It defaults |    errors (if non-NULL) defines the error handling. It defaults | ||||||
|  | @ -397,7 +397,7 @@ extern DL_IMPORT(PyObject*) PyUnicode_AsUTF16String( | ||||||
|     ); |     ); | ||||||
| 
 | 
 | ||||||
| /* Returns a Python string object holding the UTF-16 encoded value of
 | /* Returns a Python string object holding the UTF-16 encoded value of
 | ||||||
|    the Unicode data in s. |    the Unicode data. | ||||||
| 
 | 
 | ||||||
|    If byteorder is not 0, output is written according to the following |    If byteorder is not 0, output is written according to the following | ||||||
|    byte order: |    byte order: | ||||||
|  | @ -587,6 +587,37 @@ extern DL_IMPORT(PyObject*) PyUnicode_EncodeMBCS( | ||||||
| 
 | 
 | ||||||
| #endif /* MS_WIN32 */ | #endif /* MS_WIN32 */ | ||||||
| 
 | 
 | ||||||
|  | /* --- Decimal Encoder ---------------------------------------------------- */ | ||||||
|  | 
 | ||||||
|  | /* Takes a Unicode string holding a decimal value and writes it into
 | ||||||
|  |    an output buffer using standard ASCII digit codes. | ||||||
|  | 
 | ||||||
|  |    The output buffer has to provide at least length+1 bytes of storage | ||||||
|  |    area. The output string is 0-terminated. | ||||||
|  | 
 | ||||||
|  |    The encoder converts whitespace to ' ', decimal characters to their | ||||||
|  |    corresponding ASCII digit and all other Latin-1 characters except | ||||||
|  |    \0 as-is. Characters outside this range (Unicode ordinals 1-256) | ||||||
|  |    are treated as errors. This includes embedded NULL bytes. | ||||||
|  | 
 | ||||||
|  |    Error handling is defined by the errors argument: | ||||||
|  | 
 | ||||||
|  |       NULL or "strict": raise a ValueError | ||||||
|  |       "ignore": ignore the wrong characters (these are not copied to the | ||||||
|  | 		output buffer) | ||||||
|  |       "replace": replaces illegal characters with '?' | ||||||
|  | 
 | ||||||
|  |    Returns 0 on success, -1 on failure. | ||||||
|  | 
 | ||||||
|  | */ | ||||||
|  | 
 | ||||||
|  | extern DL_IMPORT(int) PyUnicode_EncodeDecimal( | ||||||
|  |     Py_UNICODE *s,		/* Unicode buffer */ | ||||||
|  |     int length,			/* Number of Py_UNICODE chars to encode */ | ||||||
|  |     char *output,		/* Output buffer; must have size >= length */ | ||||||
|  |     const char *errors		/* error handling */ | ||||||
|  |     ); | ||||||
|  | 
 | ||||||
| /* --- Methods & Slots ----------------------------------------------------
 | /* --- Methods & Slots ----------------------------------------------------
 | ||||||
| 
 | 
 | ||||||
|    These are capable of handling Unicode objects and strings on input |    These are capable of handling Unicode objects and strings on input | ||||||
|  |  | ||||||
|  | @ -4,8 +4,8 @@ | ||||||
|     directory. |     directory. | ||||||
| 
 | 
 | ||||||
|     Codec modules must have names corresponding to standard lower-case |     Codec modules must have names corresponding to standard lower-case | ||||||
|     encoding names. Hyphens are automatically converted to |     encoding names with hyphens mapped to underscores, e.g. 'utf-8' is | ||||||
|     underscores, e.g. 'utf-8' is looked up as module utf_8. |     implemented by the module 'utf_8.py'. | ||||||
| 
 | 
 | ||||||
|     Each codec module must export the following interface: |     Each codec module must export the following interface: | ||||||
| 
 | 
 | ||||||
|  | @ -40,7 +40,7 @@ def search_function(encoding): | ||||||
|         return entry |         return entry | ||||||
| 
 | 
 | ||||||
|     # Import the module |     # Import the module | ||||||
|     modname = string.replace(encoding,'-','_') |     modname = string.replace(encoding, '-', '_') | ||||||
|     modname = aliases.aliases.get(modname,modname) |     modname = aliases.aliases.get(modname,modname) | ||||||
|     try: |     try: | ||||||
|         mod = __import__(modname,globals(),locals(),'*') |         mod = __import__(modname,globals(),locals(),'*') | ||||||
|  |  | ||||||
|  | @ -54,4 +54,7 @@ | ||||||
|     'macroman': 'mac_roman', |     'macroman': 'mac_roman', | ||||||
|     'macturkish': 'mac_turkish', |     'macturkish': 'mac_turkish', | ||||||
| 
 | 
 | ||||||
|  |     # MBCS | ||||||
|  |     'dbcs': 'mbcs', | ||||||
|  | 
 | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -196,14 +196,11 @@ def atof(s): | ||||||
|     Return the floating point number represented by the string s. |     Return the floating point number represented by the string s. | ||||||
| 
 | 
 | ||||||
|     """ |     """ | ||||||
|     if type(s) == _StringType: |  | ||||||
|     return _float(s) |     return _float(s) | ||||||
|     else: | 
 | ||||||
|         raise TypeError('argument 1: expected string, %s found' % |  | ||||||
|                         type(s).__name__) |  | ||||||
| 
 | 
 | ||||||
| # Convert string to integer | # Convert string to integer | ||||||
| def atoi(*args): | def atoi(s , base=10): | ||||||
|     """atoi(s [,base]) -> int |     """atoi(s [,base]) -> int | ||||||
| 
 | 
 | ||||||
|     Return the integer represented by the string s in the given |     Return the integer represented by the string s in the given | ||||||
|  | @ -214,23 +211,11 @@ def atoi(*args): | ||||||
|     accepted. |     accepted. | ||||||
| 
 | 
 | ||||||
|     """ |     """ | ||||||
|     try: |     return _int(s, base) | ||||||
|         s = args[0] |  | ||||||
|     except IndexError: |  | ||||||
|         raise TypeError('function requires at least 1 argument: %d given' % |  | ||||||
|                         len(args)) |  | ||||||
|     # Don't catch type error resulting from too many arguments to int().  The |  | ||||||
|     # error message isn't compatible but the error type is, and this function |  | ||||||
|     # is complicated enough already. |  | ||||||
|     if type(s) == _StringType: |  | ||||||
|         return _apply(_int, args) |  | ||||||
|     else: |  | ||||||
|         raise TypeError('argument 1: expected string, %s found' % |  | ||||||
|                         type(s).__name__) |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # Convert string to long integer | # Convert string to long integer | ||||||
| def atol(*args): | def atol(s, base=10): | ||||||
|     """atol(s [,base]) -> long |     """atol(s [,base]) -> long | ||||||
| 
 | 
 | ||||||
|     Return the long integer represented by the string s in the |     Return the long integer represented by the string s in the | ||||||
|  | @ -242,19 +227,7 @@ def atol(*args): | ||||||
|     unless base is 0. |     unless base is 0. | ||||||
| 
 | 
 | ||||||
|     """ |     """ | ||||||
|     try: |     return _long(s, base) | ||||||
|         s = args[0] |  | ||||||
|     except IndexError: |  | ||||||
|         raise TypeError('function requires at least 1 argument: %d given' % |  | ||||||
|                         len(args)) |  | ||||||
|     # Don't catch type error resulting from too many arguments to long().  The |  | ||||||
|     # error message isn't compatible but the error type is, and this function |  | ||||||
|     # is complicated enough already. |  | ||||||
|     if type(s) == _StringType: |  | ||||||
|         return _apply(_long, args) |  | ||||||
|     else: |  | ||||||
|         raise TypeError('argument 1: expected string, %s found' % |  | ||||||
|                         type(s).__name__) |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # Left-justify a string | # Left-justify a string | ||||||
|  |  | ||||||
|  | @ -3,3 +3,4 @@ Testing Unicode comparisons... done. | ||||||
| Testing Unicode contains method... done. | Testing Unicode contains method... done. | ||||||
| Testing Unicode formatting strings... done. | Testing Unicode formatting strings... done. | ||||||
| Testing builtin codecs... done. | Testing builtin codecs... done. | ||||||
|  | Testing standard mapping codecs... 0-127... 128-255... done. | ||||||
|  |  | ||||||
|  | @ -95,6 +95,7 @@ def __call__(self): pass | ||||||
| if complex(0j, 3.14) <> 3.14j: raise TestFailed, 'complex(0j, 3.14)' | if complex(0j, 3.14) <> 3.14j: raise TestFailed, 'complex(0j, 3.14)' | ||||||
| if complex(0.0, 3.14) <> 3.14j: raise TestFailed, 'complex(0.0, 3.14)' | if complex(0.0, 3.14) <> 3.14j: raise TestFailed, 'complex(0.0, 3.14)' | ||||||
| if complex("  3.14+J  ") <> 3.14+1j:  raise TestFailed, 'complex("  3.14+J  )"' | if complex("  3.14+J  ") <> 3.14+1j:  raise TestFailed, 'complex("  3.14+J  )"' | ||||||
|  | if complex(u"  3.14+J  ") <> 3.14+1j:  raise TestFailed, 'complex(u"  3.14+J  )"' | ||||||
| class Z: | class Z: | ||||||
|     def __complex__(self): return 3.14j |     def __complex__(self): return 3.14j | ||||||
| z = Z() | z = Z() | ||||||
|  | @ -208,6 +209,9 @@ def identity(item): | ||||||
| if float(314) <> 314.0: raise TestFailed, 'float(314)' | if float(314) <> 314.0: raise TestFailed, 'float(314)' | ||||||
| if float(314L) <> 314.0: raise TestFailed, 'float(314L)' | if float(314L) <> 314.0: raise TestFailed, 'float(314L)' | ||||||
| if float("  3.14  ") <> 3.14:  raise TestFailed, 'float("  3.14  ")' | if float("  3.14  ") <> 3.14:  raise TestFailed, 'float("  3.14  ")' | ||||||
|  | if float(u"  3.14  ") <> 3.14:  raise TestFailed, 'float(u"  3.14  ")' | ||||||
|  | if float(u"  \u0663.\u0661\u0664  ") <> 3.14: | ||||||
|  |     raise TestFailed, 'float(u"  \u0663.\u0661\u0664  ")' | ||||||
| 
 | 
 | ||||||
| print 'getattr' | print 'getattr' | ||||||
| import sys | import sys | ||||||
|  | @ -254,6 +258,9 @@ def f(): pass | ||||||
| if int(-3.9) <> -3: raise TestFailed, 'int(-3.9)' | if int(-3.9) <> -3: raise TestFailed, 'int(-3.9)' | ||||||
| if int(3.5) <> 3: raise TestFailed, 'int(3.5)' | if int(3.5) <> 3: raise TestFailed, 'int(3.5)' | ||||||
| if int(-3.5) <> -3: raise TestFailed, 'int(-3.5)' | if int(-3.5) <> -3: raise TestFailed, 'int(-3.5)' | ||||||
|  | # Different base: | ||||||
|  | if int("10",16) <> 16L: raise TestFailed, 'int("10",16)' | ||||||
|  | if int(u"10",16) <> 16L: raise TestFailed, 'int(u"10",16)' | ||||||
| # Test conversion fron strings and various anomalies | # Test conversion fron strings and various anomalies | ||||||
| L = [ | L = [ | ||||||
|         ('0', 0), |         ('0', 0), | ||||||
|  | @ -267,9 +274,28 @@ def f(): pass | ||||||
|         ('314 ', 314), |         ('314 ', 314), | ||||||
|         ('  \t\t  314  \t\t  ', 314), |         ('  \t\t  314  \t\t  ', 314), | ||||||
|         (`sys.maxint`, sys.maxint), |         (`sys.maxint`, sys.maxint), | ||||||
|  |         ('  1x', ValueError), | ||||||
|  |         ('  1  ', 1), | ||||||
|  |         ('  1\02  ', ValueError), | ||||||
|         ('', ValueError), |         ('', ValueError), | ||||||
|         (' ', ValueError), |         (' ', ValueError), | ||||||
|         ('  \t\t  ', ValueError), |         ('  \t\t  ', ValueError), | ||||||
|  |         (u'0', 0), | ||||||
|  |         (u'1', 1), | ||||||
|  |         (u'9', 9), | ||||||
|  |         (u'10', 10), | ||||||
|  |         (u'99', 99), | ||||||
|  |         (u'100', 100), | ||||||
|  |         (u'314', 314), | ||||||
|  |         (u' 314', 314), | ||||||
|  |         (u'\u0663\u0661\u0664 ', 314), | ||||||
|  |         (u'  \t\t  314  \t\t  ', 314), | ||||||
|  |         (u'  1x', ValueError), | ||||||
|  |         (u'  1  ', 1), | ||||||
|  |         (u'  1\02  ', ValueError), | ||||||
|  |         (u'', ValueError), | ||||||
|  |         (u' ', ValueError), | ||||||
|  |         (u'  \t\t  ', ValueError), | ||||||
| ] | ] | ||||||
| for s, v in L: | for s, v in L: | ||||||
|     for sign in "", "+", "-": |     for sign in "", "+", "-": | ||||||
|  | @ -349,10 +375,17 @@ class E: | ||||||
| if long(-3.9) <> -3L: raise TestFailed, 'long(-3.9)' | if long(-3.9) <> -3L: raise TestFailed, 'long(-3.9)' | ||||||
| if long(3.5) <> 3L: raise TestFailed, 'long(3.5)' | if long(3.5) <> 3L: raise TestFailed, 'long(3.5)' | ||||||
| if long(-3.5) <> -3L: raise TestFailed, 'long(-3.5)' | if long(-3.5) <> -3L: raise TestFailed, 'long(-3.5)' | ||||||
|  | if long("-3") <> -3L: raise TestFailed, 'long("-3")' | ||||||
|  | if long(u"-3") <> -3L: raise TestFailed, 'long(u"-3")' | ||||||
|  | # Different base: | ||||||
|  | if long("10",16) <> 16L: raise TestFailed, 'long("10",16)' | ||||||
|  | if long(u"10",16) <> 16L: raise TestFailed, 'long(u"10",16)' | ||||||
| # Check conversions from string (same test set as for int(), and then some) | # Check conversions from string (same test set as for int(), and then some) | ||||||
| LL = [ | LL = [ | ||||||
|         ('1' + '0'*20, 10L**20), |         ('1' + '0'*20, 10L**20), | ||||||
|         ('1' + '0'*100, 10L**100), |         ('1' + '0'*100, 10L**100), | ||||||
|  |         (u'1' + u'0'*20, 10L**20), | ||||||
|  |         (u'1' + u'0'*100, 10L**100), | ||||||
| ] | ] | ||||||
| for s, v in L + LL: | for s, v in L + LL: | ||||||
|     for sign in "", "+", "-": |     for sign in "", "+", "-": | ||||||
|  | @ -363,11 +396,11 @@ class E: | ||||||
|                 vv = -v |                 vv = -v | ||||||
|             try: |             try: | ||||||
|                 if long(ss) != long(vv): |                 if long(ss) != long(vv): | ||||||
|                     raise TestFailed, "int(%s)" % `ss` |                     raise TestFailed, "long(%s)" % `ss` | ||||||
|             except v: |             except v: | ||||||
|                 pass |                 pass | ||||||
|             except ValueError, e: |             except ValueError, e: | ||||||
|                 raise TestFailed, "int(%s) raised ValueError: %s" % (`ss`, e) |                 raise TestFailed, "long(%s) raised ValueError: %s" % (`ss`, e) | ||||||
| 
 | 
 | ||||||
| print 'map' | print 'map' | ||||||
| if map(None, 'hello world') <> ['h','e','l','l','o',' ','w','o','r','l','d']: | if map(None, 'hello world') <> ['h','e','l','l','o',' ','w','o','r','l','d']: | ||||||
|  |  | ||||||
|  | @ -221,15 +221,23 @@ def __init__(self): self.seq = [7, u'hello', 123L] | ||||||
| 
 | 
 | ||||||
| # Contains: | # Contains: | ||||||
| print 'Testing Unicode contains method...', | print 'Testing Unicode contains method...', | ||||||
| assert ('a' in 'abdb') == 1 | assert ('a' in u'abdb') == 1 | ||||||
| assert ('a' in 'bdab') == 1 | assert ('a' in u'bdab') == 1 | ||||||
| assert ('a' in 'bdaba') == 1 | assert ('a' in u'bdaba') == 1 | ||||||
| assert ('a' in 'bdba') == 1 | assert ('a' in u'bdba') == 1 | ||||||
| assert ('a' in u'bdba') == 1 | assert ('a' in u'bdba') == 1 | ||||||
| assert (u'a' in u'bdba') == 1 | assert (u'a' in u'bdba') == 1 | ||||||
| assert (u'a' in u'bdb') == 0 | assert (u'a' in u'bdb') == 0 | ||||||
| assert (u'a' in 'bdb') == 0 | assert (u'a' in 'bdb') == 0 | ||||||
| assert (u'a' in 'bdba') == 1 | assert (u'a' in 'bdba') == 1 | ||||||
|  | assert (u'a' in ('a',1,None)) == 1 | ||||||
|  | assert (u'a' in (1,None,'a')) == 1 | ||||||
|  | assert (u'a' in (1,None,u'a')) == 1 | ||||||
|  | assert ('a' in ('a',1,None)) == 1 | ||||||
|  | assert ('a' in (1,None,'a')) == 1 | ||||||
|  | assert ('a' in (1,None,u'a')) == 1 | ||||||
|  | assert ('a' in ('x',1,u'y')) == 0 | ||||||
|  | assert ('a' in ('x',1,None)) == 0 | ||||||
| print 'done.' | print 'done.' | ||||||
| 
 | 
 | ||||||
| # Formatting: | # Formatting: | ||||||
|  | @ -270,11 +278,88 @@ def __init__(self): self.seq = [7, u'hello', 123L] | ||||||
|     assert unicode(u.encode(encoding),encoding) == u |     assert unicode(u.encode(encoding),encoding) == u | ||||||
| 
 | 
 | ||||||
| u = u''.join(map(unichr, range(256))) | u = u''.join(map(unichr, range(256))) | ||||||
| for encoding in ('latin-1',): | for encoding in ( | ||||||
|  |     'latin-1', | ||||||
|  |     ): | ||||||
|  |     try: | ||||||
|         assert unicode(u.encode(encoding),encoding) == u |         assert unicode(u.encode(encoding),encoding) == u | ||||||
|  |     except AssertionError: | ||||||
|  |         print '*** codec "%s" failed round-trip' % encoding | ||||||
|  |     except ValueError,why: | ||||||
|  |         print '*** codec for "%s" failed: %s' % (encoding, why) | ||||||
| 
 | 
 | ||||||
| u = u''.join(map(unichr, range(128))) | u = u''.join(map(unichr, range(128))) | ||||||
| for encoding in ('ascii',): | for encoding in ( | ||||||
|  |     'ascii', | ||||||
|  |     ): | ||||||
|  |     try: | ||||||
|         assert unicode(u.encode(encoding),encoding) == u |         assert unicode(u.encode(encoding),encoding) == u | ||||||
|  |     except AssertionError: | ||||||
|  |         print '*** codec "%s" failed round-trip' % encoding | ||||||
|  |     except ValueError,why: | ||||||
|  |         print '*** codec for "%s" failed: %s' % (encoding, why) | ||||||
|  | 
 | ||||||
|  | print 'done.' | ||||||
|  | 
 | ||||||
|  | print 'Testing standard mapping codecs...', | ||||||
|  | 
 | ||||||
|  | print '0-127...', | ||||||
|  | s = ''.join(map(chr, range(128))) | ||||||
|  | for encoding in ( | ||||||
|  |     'cp037', 'cp1026', | ||||||
|  |     'cp437', 'cp500', 'cp737', 'cp775', 'cp850', | ||||||
|  |     'cp852', 'cp855', 'cp860', 'cp861', 'cp862', | ||||||
|  |     'cp863', 'cp865', 'cp866',  | ||||||
|  |     'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15', | ||||||
|  |     'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', | ||||||
|  |     'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1', | ||||||
|  |     'mac_cyrillic', 'mac_latin2', | ||||||
|  | 
 | ||||||
|  |     'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', | ||||||
|  |     'cp1256', 'cp1257', 'cp1258', | ||||||
|  |     'cp856', 'cp857', 'cp864', 'cp869', 'cp874', | ||||||
|  | 
 | ||||||
|  |     'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish', | ||||||
|  |     'cp1006', 'cp875', 'iso8859_8', | ||||||
|  |      | ||||||
|  |     ### These have undefined mappings: | ||||||
|  |     #'cp424', | ||||||
|  |      | ||||||
|  |     ): | ||||||
|  |     try: | ||||||
|  |         assert unicode(s,encoding).encode(encoding) == s | ||||||
|  |     except AssertionError: | ||||||
|  |         print '*** codec "%s" failed round-trip' % encoding | ||||||
|  |     except ValueError,why: | ||||||
|  |         print '*** codec for "%s" failed: %s' % (encoding, why) | ||||||
|  | 
 | ||||||
|  | print '128-255...', | ||||||
|  | s = ''.join(map(chr, range(128,256))) | ||||||
|  | for encoding in ( | ||||||
|  |     'cp037', 'cp1026', | ||||||
|  |     'cp437', 'cp500', 'cp737', 'cp775', 'cp850', | ||||||
|  |     'cp852', 'cp855', 'cp860', 'cp861', 'cp862', | ||||||
|  |     'cp863', 'cp865', 'cp866',  | ||||||
|  |     'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15', | ||||||
|  |     'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', | ||||||
|  |     'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1', | ||||||
|  |     'mac_cyrillic', 'mac_latin2', | ||||||
|  |      | ||||||
|  |     ### These have undefined mappings: | ||||||
|  |     #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', | ||||||
|  |     #'cp1256', 'cp1257', 'cp1258', | ||||||
|  |     #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874', | ||||||
|  |     #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish', | ||||||
|  |      | ||||||
|  |     ### These fail the round-trip: | ||||||
|  |     #'cp1006', 'cp875', 'iso8859_8', | ||||||
|  |      | ||||||
|  |     ): | ||||||
|  |     try: | ||||||
|  |         assert unicode(s,encoding).encode(encoding) == s | ||||||
|  |     except AssertionError: | ||||||
|  |         print '*** codec "%s" failed round-trip' % encoding | ||||||
|  |     except ValueError,why: | ||||||
|  |         print '*** codec for "%s" failed: %s' % (encoding, why) | ||||||
| 
 | 
 | ||||||
| print 'done.' | print 'done.' | ||||||
|  |  | ||||||
|  | @ -726,6 +726,27 @@ PyNumber_Absolute(o) | ||||||
| 	return type_error("bad operand type for abs()"); | 	return type_error("bad operand type for abs()"); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /* Add a check for embedded NULL-bytes in the argument. */ | ||||||
|  | static PyObject * | ||||||
|  | int_from_string(s, len) | ||||||
|  | 	const char *s; | ||||||
|  | 	int len; | ||||||
|  | { | ||||||
|  | 	char *end; | ||||||
|  | 	PyObject *x; | ||||||
|  | 
 | ||||||
|  | 	x = PyInt_FromString((char*)s, &end, 10); | ||||||
|  | 	if (x == NULL) | ||||||
|  | 		return NULL; | ||||||
|  | 	if (end != s + len) { | ||||||
|  | 		PyErr_SetString(PyExc_ValueError, | ||||||
|  | 				"null byte in argument for int()"); | ||||||
|  | 		Py_DECREF(x); | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
|  | 	return x; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| PyObject * | PyObject * | ||||||
| PyNumber_Int(o) | PyNumber_Int(o) | ||||||
| 	PyObject *o; | 	PyObject *o; | ||||||
|  | @ -736,69 +757,42 @@ PyNumber_Int(o) | ||||||
| 
 | 
 | ||||||
| 	if (o == NULL) | 	if (o == NULL) | ||||||
| 		return null_error(); | 		return null_error(); | ||||||
|  | 	if (PyInt_Check(o)) { | ||||||
|  | 		Py_INCREF(o); | ||||||
|  | 		return o; | ||||||
|  | 	} | ||||||
| 	if (PyString_Check(o)) | 	if (PyString_Check(o)) | ||||||
| 		return PyInt_FromString(PyString_AS_STRING(o), NULL, 10); | 		return int_from_string(PyString_AS_STRING(o),  | ||||||
|  | 				       PyString_GET_SIZE(o)); | ||||||
|  | 	if (PyUnicode_Check(o)) | ||||||
|  | 		return PyInt_FromUnicode(PyUnicode_AS_UNICODE(o), | ||||||
|  | 					 PyUnicode_GET_SIZE(o), | ||||||
|  | 					 10); | ||||||
| 	m = o->ob_type->tp_as_number; | 	m = o->ob_type->tp_as_number; | ||||||
| 	if (m && m->nb_int) | 	if (m && m->nb_int) | ||||||
| 		return m->nb_int(o); | 		return m->nb_int(o); | ||||||
| 	if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len)) | 	if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len)) | ||||||
| 		return PyInt_FromString((char*)buffer, NULL, 10); | 		return int_from_string((char*)buffer, buffer_len); | ||||||
| 
 | 
 | ||||||
| 	return type_error("object can't be converted to int"); | 	return type_error("object can't be converted to int"); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* There are two C API functions for converting a string to a long,
 | /* Add a check for embedded NULL-bytes in the argument. */ | ||||||
|  * PyNumber_Long() and PyLong_FromString().  Both are used in builtin_long,  |  | ||||||
|  * reachable from Python with the built-in function long(). |  | ||||||
|  * |  | ||||||
|  * The difference is this: PyNumber_Long will raise an exception when the |  | ||||||
|  * string cannot be converted to a long.  The most common situation is |  | ||||||
|  * where a float string is passed in; this raises a ValueError. |  | ||||||
|  * PyLong_FromString does not raise an exception; it silently truncates the  |  | ||||||
|  * float to an integer. |  | ||||||
|  * |  | ||||||
|  * You can see the different behavior from Python with the following: |  | ||||||
|  * |  | ||||||
|  * long('9.5') |  | ||||||
|  * => ValueError: invalid literal for long(): 9.5 |  | ||||||
|  * |  | ||||||
|  * long('9.5', 10) |  | ||||||
|  * => 9L |  | ||||||
|  * |  | ||||||
|  * The first example ends up calling PyNumber_Long(), while the second one |  | ||||||
|  * calls PyLong_FromString(). |  | ||||||
|  */ |  | ||||||
| static PyObject * | static PyObject * | ||||||
| long_from_string(s, len) | long_from_string(s, len) | ||||||
| 	const char *s; | 	const char *s; | ||||||
| 	int len; | 	int len; | ||||||
| { | { | ||||||
| 	const char *start; |  | ||||||
| 	char *end; | 	char *end; | ||||||
| 	PyObject *x; | 	PyObject *x; | ||||||
| 	char buffer[256]; /* For errors */ |  | ||||||
| 
 | 
 | ||||||
| 	start = s; |  | ||||||
| 	while (*s && isspace(Py_CHARMASK(*s))) |  | ||||||
| 		s++; |  | ||||||
| 	x = PyLong_FromString((char*)s, &end, 10); | 	x = PyLong_FromString((char*)s, &end, 10); | ||||||
| 	if (x == NULL) { | 	if (x == NULL) | ||||||
| 		if (PyErr_ExceptionMatches(PyExc_ValueError)) |  | ||||||
| 			goto bad; |  | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	} | 	if (end != s + len) { | ||||||
| 	while (*end && isspace(Py_CHARMASK(*end))) |  | ||||||
| 		end++; |  | ||||||
| 	if (*end != '\0') { |  | ||||||
|   bad: |  | ||||||
| 		sprintf(buffer, "invalid literal for long(): %.200s", s); |  | ||||||
| 		PyErr_SetString(PyExc_ValueError, buffer); |  | ||||||
| 		Py_XDECREF(x); |  | ||||||
| 		return NULL; |  | ||||||
| 	} |  | ||||||
| 	else if (end != start + len) { |  | ||||||
| 		PyErr_SetString(PyExc_ValueError, | 		PyErr_SetString(PyExc_ValueError, | ||||||
| 				"null byte in argument for long()"); | 				"null byte in argument for long()"); | ||||||
|  | 		Py_DECREF(x); | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	} | 	} | ||||||
| 	return x; | 	return x; | ||||||
|  | @ -814,6 +808,10 @@ PyNumber_Long(o) | ||||||
| 
 | 
 | ||||||
| 	if (o == NULL) | 	if (o == NULL) | ||||||
| 		return null_error(); | 		return null_error(); | ||||||
|  | 	if (PyLong_Check(o)) { | ||||||
|  | 		Py_INCREF(o); | ||||||
|  | 		return o; | ||||||
|  | 	} | ||||||
| 	if (PyString_Check(o)) | 	if (PyString_Check(o)) | ||||||
| 		/* need to do extra error checking that PyLong_FromString() 
 | 		/* need to do extra error checking that PyLong_FromString() 
 | ||||||
| 		 * doesn't do.  In particular long('9.5') must raise an | 		 * doesn't do.  In particular long('9.5') must raise an | ||||||
|  | @ -821,6 +819,11 @@ PyNumber_Long(o) | ||||||
| 		 */ | 		 */ | ||||||
| 		return long_from_string(PyString_AS_STRING(o), | 		return long_from_string(PyString_AS_STRING(o), | ||||||
| 					PyString_GET_SIZE(o)); | 					PyString_GET_SIZE(o)); | ||||||
|  | 	if (PyUnicode_Check(o)) | ||||||
|  | 		/* The above check is done in PyLong_FromUnicode(). */ | ||||||
|  | 		return PyLong_FromUnicode(PyUnicode_AS_UNICODE(o), | ||||||
|  | 					  PyUnicode_GET_SIZE(o), | ||||||
|  | 					  10); | ||||||
| 	m = o->ob_type->tp_as_number; | 	m = o->ob_type->tp_as_number; | ||||||
| 	if (m && m->nb_long) | 	if (m && m->nb_long) | ||||||
| 		return m->nb_long(o); | 		return m->nb_long(o); | ||||||
|  | @ -838,6 +841,10 @@ PyNumber_Float(o) | ||||||
| 
 | 
 | ||||||
| 	if (o == NULL) | 	if (o == NULL) | ||||||
| 		return null_error(); | 		return null_error(); | ||||||
|  | 	if (PyFloat_Check(o)) { | ||||||
|  | 		Py_INCREF(o); | ||||||
|  | 		return o; | ||||||
|  | 	} | ||||||
| 	if (!PyString_Check(o)) { | 	if (!PyString_Check(o)) { | ||||||
| 		m = o->ob_type->tp_as_number; | 		m = o->ob_type->tp_as_number; | ||||||
| 		if (m && m->nb_float) | 		if (m && m->nb_float) | ||||||
|  |  | ||||||
|  | @ -164,6 +164,22 @@ PyFloat_FromString(v, pend) | ||||||
| 		s = PyString_AS_STRING(v); | 		s = PyString_AS_STRING(v); | ||||||
| 		len = PyString_GET_SIZE(v); | 		len = PyString_GET_SIZE(v); | ||||||
| 	} | 	} | ||||||
|  | 	else if (PyUnicode_Check(v)) { | ||||||
|  | 		char s_buffer[256]; | ||||||
|  | 
 | ||||||
|  | 		if (PyUnicode_GET_SIZE(v) >= sizeof(s_buffer)) { | ||||||
|  | 			PyErr_SetString(PyExc_ValueError, | ||||||
|  | 				 "float() literal too large to convert"); | ||||||
|  | 			return NULL; | ||||||
|  | 		} | ||||||
|  | 		if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v),  | ||||||
|  | 					    PyUnicode_GET_SIZE(v), | ||||||
|  | 					    s_buffer,  | ||||||
|  | 					    NULL)) | ||||||
|  | 			return NULL; | ||||||
|  | 		s = s_buffer; | ||||||
|  | 		len = strlen(s); | ||||||
|  | 	} | ||||||
| 	else if (PyObject_AsCharBuffer(v, &s, &len)) { | 	else if (PyObject_AsCharBuffer(v, &s, &len)) { | ||||||
| 		PyErr_SetString(PyExc_TypeError, | 		PyErr_SetString(PyExc_TypeError, | ||||||
| 				"float() needs a string argument"); | 				"float() needs a string argument"); | ||||||
|  |  | ||||||
|  | @ -261,6 +261,24 @@ PyInt_FromString(s, pend, base) | ||||||
| 	return PyInt_FromLong(x); | 	return PyInt_FromLong(x); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | PyObject * | ||||||
|  | PyInt_FromUnicode(s, length, base) | ||||||
|  | 	Py_UNICODE *s; | ||||||
|  | 	int length; | ||||||
|  | 	int base; | ||||||
|  | { | ||||||
|  | 	char buffer[256]; | ||||||
|  | 	 | ||||||
|  | 	if (length >= sizeof(buffer)) { | ||||||
|  | 		PyErr_SetString(PyExc_ValueError, | ||||||
|  | 				"int() literal too large to convert"); | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
|  | 	if (PyUnicode_EncodeDecimal(s, length, buffer, NULL)) | ||||||
|  | 		return NULL; | ||||||
|  | 	return PyInt_FromString(buffer, NULL, base); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /* Methods */ | /* Methods */ | ||||||
| 
 | 
 | ||||||
| /* ARGSUSED */ | /* ARGSUSED */ | ||||||
|  |  | ||||||
|  | @ -724,7 +724,7 @@ PyLong_FromString(str, pend, base) | ||||||
| 	int base; | 	int base; | ||||||
| { | { | ||||||
| 	int sign = 1; | 	int sign = 1; | ||||||
| 	char *start; | 	char *start, *orig_str = str; | ||||||
| 	PyLongObject *z; | 	PyLongObject *z; | ||||||
| 	 | 	 | ||||||
| 	if ((base != 0 && base < 2) || base > 36) { | 	if ((base != 0 && base < 2) || base > 36) { | ||||||
|  | @ -772,17 +772,44 @@ PyLong_FromString(str, pend, base) | ||||||
| 	} | 	} | ||||||
| 	if (z == NULL) | 	if (z == NULL) | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	if (str == start) { | 	if (str == start) | ||||||
| 		PyErr_SetString(PyExc_ValueError, | 		goto onError; | ||||||
| 				"no digits in long int constant"); |  | ||||||
| 		Py_DECREF(z); |  | ||||||
| 		return NULL; |  | ||||||
| 	} |  | ||||||
| 	if (sign < 0 && z != NULL && z->ob_size != 0) | 	if (sign < 0 && z != NULL && z->ob_size != 0) | ||||||
| 		z->ob_size = -(z->ob_size); | 		z->ob_size = -(z->ob_size); | ||||||
|  | 	if (*str == 'L' || *str == 'l') | ||||||
|  | 		str++; | ||||||
|  | 	while (*str && isspace(Py_CHARMASK(*str))) | ||||||
|  | 		str++; | ||||||
|  | 	if (*str != '\0') | ||||||
|  | 		goto onError; | ||||||
| 	if (pend) | 	if (pend) | ||||||
| 		*pend = str; | 		*pend = str; | ||||||
| 	return (PyObject *) z; | 	return (PyObject *) z; | ||||||
|  | 
 | ||||||
|  |  onError: | ||||||
|  | 	PyErr_Format(PyExc_ValueError,  | ||||||
|  | 		     "invalid literal for long(): %.200s", orig_str); | ||||||
|  | 	Py_XDECREF(z); | ||||||
|  | 	return NULL; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | PyObject * | ||||||
|  | PyLong_FromUnicode(u, length, base) | ||||||
|  | 	Py_UNICODE *u; | ||||||
|  | 	int length; | ||||||
|  | 	int base; | ||||||
|  | { | ||||||
|  | 	char buffer[256]; | ||||||
|  | 
 | ||||||
|  | 	if (length >= sizeof(buffer)) { | ||||||
|  | 		PyErr_SetString(PyExc_ValueError, | ||||||
|  | 				"long() literal too large to convert"); | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
|  | 	if (PyUnicode_EncodeDecimal(u, length, buffer, NULL)) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	return PyLong_FromString(buffer, NULL, base); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static PyLongObject *x_divrem | static PyLongObject *x_divrem | ||||||
|  |  | ||||||
|  | @ -329,8 +329,14 @@ PyObject *PyUnicode_FromObject(register PyObject *obj) | ||||||
| 	s = PyString_AS_STRING(obj); | 	s = PyString_AS_STRING(obj); | ||||||
| 	len = PyString_GET_SIZE(obj); | 	len = PyString_GET_SIZE(obj); | ||||||
|     } |     } | ||||||
|     else if (PyObject_AsCharBuffer(obj, &s, &len)) |     else if (PyObject_AsCharBuffer(obj, &s, &len)) { | ||||||
|  | 	/* Overwrite the error message with something more useful in
 | ||||||
|  | 	   case of a TypeError. */ | ||||||
|  | 	if (PyErr_ExceptionMatches(PyExc_TypeError)) | ||||||
|  | 	    PyErr_SetString(PyExc_TypeError, | ||||||
|  | 			    "coercing to Unicode: need string or charbuffer"); | ||||||
| 	return NULL; | 	return NULL; | ||||||
|  |     } | ||||||
|     if (len == 0) { |     if (len == 0) { | ||||||
| 	Py_INCREF(unicode_empty); | 	Py_INCREF(unicode_empty); | ||||||
| 	return (PyObject *)unicode_empty; | 	return (PyObject *)unicode_empty; | ||||||
|  | @ -1923,6 +1929,60 @@ PyObject *PyUnicode_Translate(PyObject *str, | ||||||
|     return NULL; |     return NULL; | ||||||
| } | } | ||||||
|      |      | ||||||
|  | /* --- Decimal Encoder ---------------------------------------------------- */ | ||||||
|  | 
 | ||||||
|  | int PyUnicode_EncodeDecimal(Py_UNICODE *s, | ||||||
|  | 			    int length, | ||||||
|  | 			    char *output, | ||||||
|  | 			    const char *errors) | ||||||
|  | { | ||||||
|  |     Py_UNICODE *p, *end; | ||||||
|  | 
 | ||||||
|  |     if (output == NULL) { | ||||||
|  | 	PyErr_BadArgument(); | ||||||
|  | 	return -1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     p = s; | ||||||
|  |     end = s + length; | ||||||
|  |     while (p < end) { | ||||||
|  | 	register Py_UNICODE ch = *p++; | ||||||
|  | 	int decimal; | ||||||
|  | 	 | ||||||
|  | 	if (Py_UNICODE_ISSPACE(ch)) { | ||||||
|  | 	    *output++ = ' '; | ||||||
|  | 	    continue; | ||||||
|  | 	} | ||||||
|  | 	decimal = Py_UNICODE_TODECIMAL(ch); | ||||||
|  | 	if (decimal >= 0) { | ||||||
|  | 	    *output++ = '0' + decimal; | ||||||
|  | 	    continue; | ||||||
|  | 	} | ||||||
|  | 	if (0 < ch < 256) { | ||||||
|  | 	    *output++ = ch; | ||||||
|  | 	    continue; | ||||||
|  | 	} | ||||||
|  | 	/* All other characters are considered invalid */ | ||||||
|  | 	if (errors == NULL || strcmp(errors, "strict") == 0) { | ||||||
|  | 	    PyErr_SetString(PyExc_ValueError, | ||||||
|  | 			    "invalid decimal Unicode string"); | ||||||
|  | 	    goto onError; | ||||||
|  | 	} | ||||||
|  | 	else if (strcmp(errors, "ignore") == 0) | ||||||
|  | 	    continue; | ||||||
|  | 	else if (strcmp(errors, "replace") == 0) { | ||||||
|  | 	    *output++ = '?'; | ||||||
|  | 	    continue; | ||||||
|  | 	} | ||||||
|  |     } | ||||||
|  |     /* 0-terminate the output string */ | ||||||
|  |     *output++ = '\0'; | ||||||
|  |     return 0; | ||||||
|  | 
 | ||||||
|  |  onError: | ||||||
|  |     return -1; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /* --- Helpers ------------------------------------------------------------ */ | /* --- Helpers ------------------------------------------------------------ */ | ||||||
| 
 | 
 | ||||||
| static  | static  | ||||||
|  | @ -2811,12 +2871,14 @@ int PyUnicode_Contains(PyObject *container, | ||||||
|     register Py_UNICODE ch; |     register Py_UNICODE ch; | ||||||
| 
 | 
 | ||||||
|     /* Coerce the two arguments */ |     /* Coerce the two arguments */ | ||||||
|     u = (PyUnicodeObject *)PyUnicode_FromObject(container); |  | ||||||
|     if (u == NULL) |  | ||||||
| 	goto onError; |  | ||||||
|     v = (PyUnicodeObject *)PyUnicode_FromObject(element); |     v = (PyUnicodeObject *)PyUnicode_FromObject(element); | ||||||
|     if (v == NULL) |     if (v == NULL) | ||||||
| 	goto onError; | 	goto onError; | ||||||
|  |     u = (PyUnicodeObject *)PyUnicode_FromObject(container); | ||||||
|  |     if (u == NULL) { | ||||||
|  | 	Py_DECREF(v); | ||||||
|  | 	goto onError; | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     /* Check v in u */ |     /* Check v in u */ | ||||||
|     if (PyUnicode_GET_SIZE(v) != 1) { |     if (PyUnicode_GET_SIZE(v) != 1) { | ||||||
|  |  | ||||||
|  | @ -449,17 +449,44 @@ complex_from_string(v) | ||||||
| 	PyObject *v; | 	PyObject *v; | ||||||
| { | { | ||||||
| 	extern double strtod Py_PROTO((const char *, char **)); | 	extern double strtod Py_PROTO((const char *, char **)); | ||||||
| 	char *s, *start, *end; | 	const char *s, *start; | ||||||
|  | 	char *end; | ||||||
| 	double x=0.0, y=0.0, z; | 	double x=0.0, y=0.0, z; | ||||||
| 	int got_re=0, got_im=0, done=0; | 	int got_re=0, got_im=0, done=0; | ||||||
| 	int digit_or_dot; | 	int digit_or_dot; | ||||||
| 	int sw_error=0; | 	int sw_error=0; | ||||||
| 	int sign; | 	int sign; | ||||||
| 	char buffer[256]; /* For errors */ | 	char buffer[256]; /* For errors */ | ||||||
|  | 	int len; | ||||||
| 
 | 
 | ||||||
| 	start = s = PyString_AS_STRING(v); | 	if (PyString_Check(v)) { | ||||||
|  | 		s = PyString_AS_STRING(v); | ||||||
|  | 		len = PyString_GET_SIZE(v); | ||||||
|  | 	} | ||||||
|  | 	else if (PyUnicode_Check(v)) { | ||||||
|  | 		char s_buffer[256]; | ||||||
|  | 
 | ||||||
|  | 		if (PyUnicode_GET_SIZE(v) >= sizeof(s_buffer)) { | ||||||
|  | 			PyErr_SetString(PyExc_ValueError, | ||||||
|  | 				 "complex() literal too large to convert"); | ||||||
|  | 			return NULL; | ||||||
|  | 		} | ||||||
|  | 		if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v),  | ||||||
|  | 					    PyUnicode_GET_SIZE(v), | ||||||
|  | 					    s_buffer,  | ||||||
|  | 					    NULL)) | ||||||
|  | 			return NULL; | ||||||
|  | 		s = s_buffer; | ||||||
|  | 		len = strlen(s); | ||||||
|  | 	} | ||||||
|  | 	else if (PyObject_AsCharBuffer(v, &s, &len)) { | ||||||
|  | 		PyErr_SetString(PyExc_TypeError, | ||||||
|  | 				"complex() needs a string first argument"); | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	/* position on first nonblank */ | 	/* position on first nonblank */ | ||||||
|  | 	start = s; | ||||||
| 	while (*s && isspace(Py_CHARMASK(*s))) | 	while (*s && isspace(Py_CHARMASK(*s))) | ||||||
| 		s++; | 		s++; | ||||||
| 	if (s[0] == '\0') { | 	if (s[0] == '\0') { | ||||||
|  | @ -475,7 +502,7 @@ complex_from_string(v) | ||||||
| 		switch (*s) { | 		switch (*s) { | ||||||
| 
 | 
 | ||||||
| 		case '\0': | 		case '\0': | ||||||
| 			if (s-start != PyString_GET_SIZE(v)) { | 			if (s-start != len) { | ||||||
| 				PyErr_SetString( | 				PyErr_SetString( | ||||||
| 					PyExc_ValueError, | 					PyExc_ValueError, | ||||||
| 					"null byte in argument for complex()"); | 					"null byte in argument for complex()"); | ||||||
|  | @ -584,7 +611,7 @@ builtin_complex(self, args) | ||||||
| 	i = NULL; | 	i = NULL; | ||||||
| 	if (!PyArg_ParseTuple(args, "O|O:complex", &r, &i)) | 	if (!PyArg_ParseTuple(args, "O|O:complex", &r, &i)) | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	if (PyString_Check(r)) | 	if (PyString_Check(r) || PyUnicode_Check(r)) | ||||||
| 		return complex_from_string(r); | 		return complex_from_string(r); | ||||||
| 	if ((nbr = r->ob_type->tp_as_number) == NULL || | 	if ((nbr = r->ob_type->tp_as_number) == NULL || | ||||||
| 	    nbr->nb_float == NULL || | 	    nbr->nb_float == NULL || | ||||||
|  | @ -1289,12 +1316,17 @@ builtin_int(self, args) | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	if (base == -909) | 	if (base == -909) | ||||||
| 		return PyNumber_Int(v); | 		return PyNumber_Int(v); | ||||||
| 	else if (!PyString_Check(v)) { | 	else if (PyString_Check(v)) | ||||||
|  | 		return PyInt_FromString(PyString_AS_STRING(v), NULL, base); | ||||||
|  | 	else if (PyUnicode_Check(v)) | ||||||
|  | 		return PyInt_FromUnicode(PyUnicode_AS_UNICODE(v), | ||||||
|  | 					 PyUnicode_GET_SIZE(v), | ||||||
|  | 					 base); | ||||||
|  | 	else { | ||||||
| 		PyErr_SetString(PyExc_TypeError, | 		PyErr_SetString(PyExc_TypeError, | ||||||
| 				"can't convert non-string with explicit base"); | 				"can't convert non-string with explicit base"); | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	} | 	} | ||||||
| 	return PyInt_FromString(PyString_AS_STRING(v), NULL, base); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static char int_doc[] = | static char int_doc[] = | ||||||
|  | @ -1319,12 +1351,17 @@ builtin_long(self, args) | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	if (base == -909) | 	if (base == -909) | ||||||
| 		return PyNumber_Long(v); | 		return PyNumber_Long(v); | ||||||
| 	else if (!PyString_Check(v)) { | 	else if (PyString_Check(v)) | ||||||
|  | 		return PyLong_FromString(PyString_AS_STRING(v), NULL, base); | ||||||
|  | 	else if (PyUnicode_Check(v)) | ||||||
|  | 		return PyLong_FromUnicode(PyUnicode_AS_UNICODE(v), | ||||||
|  | 					  PyUnicode_GET_SIZE(v), | ||||||
|  | 					  base); | ||||||
|  | 	else { | ||||||
| 		PyErr_SetString(PyExc_TypeError, | 		PyErr_SetString(PyExc_TypeError, | ||||||
| 				"can't convert non-string with explicit base"); | 				"can't convert non-string with explicit base"); | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	} | 	} | ||||||
| 	return PyLong_FromString(PyString_AS_STRING(v), NULL, base); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static char long_doc[] = | static char long_doc[] = | ||||||
|  |  | ||||||
|  | @ -84,8 +84,11 @@ int PyCodec_Register(PyObject *search_function) | ||||||
|     return -1; |     return -1; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /* Convert a string to a normalized Python string: all characters are
 | ||||||
|  |    converted to lower case, spaces are replaced with underscores. */ | ||||||
|  | 
 | ||||||
| static | static | ||||||
| PyObject *lowercasestring(const char *string) | PyObject *normalizestring(const char *string) | ||||||
| { | { | ||||||
|     register int i; |     register int i; | ||||||
|     int len = strlen(string); |     int len = strlen(string); | ||||||
|  | @ -96,8 +99,14 @@ PyObject *lowercasestring(const char *string) | ||||||
|     if (v == NULL) |     if (v == NULL) | ||||||
| 	return NULL; | 	return NULL; | ||||||
|     p = PyString_AS_STRING(v); |     p = PyString_AS_STRING(v); | ||||||
|     for (i = 0; i < len; i++) |     for (i = 0; i < len; i++) { | ||||||
| 	p[i] = tolower(string[i]); |         register char ch = string[i]; | ||||||
|  |         if (ch == ' ') | ||||||
|  |             ch = '-'; | ||||||
|  |         else | ||||||
|  |             ch = tolower(ch); | ||||||
|  | 	p[i] = ch; | ||||||
|  |     } | ||||||
|     return v; |     return v; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -132,8 +141,10 @@ PyObject *_PyCodec_Lookup(const char *encoding) | ||||||
| 	    goto onError; | 	    goto onError; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /* Convert the encoding to a lower-cased Python string */ |     /* Convert the encoding to a normalized Python string: all
 | ||||||
|     v = lowercasestring(encoding); |        characters are converted to lower case, spaces and hypens are | ||||||
|  |        replaced with underscores. */ | ||||||
|  |     v = normalizestring(encoding); | ||||||
|     if (v == NULL) |     if (v == NULL) | ||||||
| 	goto onError; | 	goto onError; | ||||||
|     PyString_InternInPlace(&v); |     PyString_InternInPlace(&v); | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Guido van Rossum
						Guido van Rossum