mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	 b713ec2531
			
		
	
	
		b713ec2531
		
	
	
	
	
		
			
			In rare cases of strings specifying true values near sys.maxint, and oddball bases (not decimal or a power of 2), int(string, base) could deliver insane answers. This repairs all such problems, and also speeds string->int significantly. On my box, here are % speedups for decimal strings of various lengths: length speedup ------ ------- 1 12.4% 2 15.7% 3 20.6% 4 28.1% 5 33.2% 6 37.5% 7 41.9% 8 46.3% 9 51.2% 10 19.5% 11 19.9% 12 23.9% 13 23.7% 14 23.3% 15 24.9% 16 25.3% 17 28.3% 18 27.9% 19 35.7% Note that the difference between 9 and 10 is the difference between short and long Python ints on a 32-bit box. The patch doesn't actually do anything to speed conversion to long: the speedup is due to detecting "unsigned long" overflow more quickly. This is a bugfix candidate, but it's a non-trivial patch and it would be painful to separate the "bug fix" from the "speed up" parts.
		
			
				
	
	
		
			241 lines
		
	
	
	
		
			6.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			241 lines
		
	
	
	
		
			6.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| 
 | |
| #include "Python.h"
 | |
| 
 | |
| #if defined(__sgi) && defined(WITH_THREAD) && !defined(_SGI_MP_SOURCE)
 | |
| #define _SGI_MP_SOURCE
 | |
| #endif
 | |
| 
 | |
| /* Convert a possibly signed character to a nonnegative int */
 | |
| /* XXX This assumes characters are 8 bits wide */
 | |
| #ifdef __CHAR_UNSIGNED__
 | |
| #define Py_CHARMASK(c)		(c)
 | |
| #else
 | |
| #define Py_CHARMASK(c)		((c) & 0xff)
 | |
| #endif
 | |
| 
 | |
| /* strtol and strtoul, renamed to avoid conflicts */
 | |
| 
 | |
| 
 | |
| #include <ctype.h>
 | |
| #ifndef DONT_HAVE_ERRNO_H
 | |
| #include <errno.h>
 | |
| #endif
 | |
| 
 | |
| /* Static overflow check values for bases 2 through 36.
 | |
|  * smallmax[base] is the largest unsigned long i such that
 | |
|  * i * base doesn't overflow unsigned long.
 | |
|  */
 | |
| static unsigned long smallmax[] = {
 | |
| 	0, /* bases 0 and 1 are invalid */
 | |
| 	0,
 | |
| 	ULONG_MAX / 2,
 | |
| 	ULONG_MAX / 3,
 | |
| 	ULONG_MAX / 4,
 | |
| 	ULONG_MAX / 5,
 | |
| 	ULONG_MAX / 6,
 | |
| 	ULONG_MAX / 7,
 | |
| 	ULONG_MAX / 8,
 | |
| 	ULONG_MAX / 9,
 | |
| 	ULONG_MAX / 10,
 | |
| 	ULONG_MAX / 11,
 | |
| 	ULONG_MAX / 12,
 | |
| 	ULONG_MAX / 13,
 | |
| 	ULONG_MAX / 14,
 | |
| 	ULONG_MAX / 15,
 | |
| 	ULONG_MAX / 16,
 | |
| 	ULONG_MAX / 17,
 | |
| 	ULONG_MAX / 18,
 | |
| 	ULONG_MAX / 19,
 | |
| 	ULONG_MAX / 20,
 | |
| 	ULONG_MAX / 21,
 | |
| 	ULONG_MAX / 22,
 | |
| 	ULONG_MAX / 23,
 | |
| 	ULONG_MAX / 24,
 | |
| 	ULONG_MAX / 25,
 | |
| 	ULONG_MAX / 26,
 | |
| 	ULONG_MAX / 27,
 | |
| 	ULONG_MAX / 28,
 | |
| 	ULONG_MAX / 29,
 | |
| 	ULONG_MAX / 30,
 | |
| 	ULONG_MAX / 31,
 | |
| 	ULONG_MAX / 32,
 | |
| 	ULONG_MAX / 33,
 | |
| 	ULONG_MAX / 34,
 | |
| 	ULONG_MAX / 35,
 | |
| 	ULONG_MAX / 36,
 | |
| };
 | |
| 
 | |
| /* maximum digits that can't ever overflow for bases 2 through 36,
 | |
|  * calculated by [int(math.floor(math.log(2**32, i))) for i in range(2, 37)].
 | |
|  * Note that this is pessimistic if sizeof(long) > 4.
 | |
|  */
 | |
| static int digitlimit[] = {
 | |
| 	0,  0, 32, 20, 16, 13, 12, 11, 10, 10,  /*  0 -  9 */
 | |
| 	9,  9,  8,  8,  8,  8,  8,  7,  7,  7,  /* 10 - 19 */
 | |
| 	7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  /* 20 - 29 */
 | |
| 	6,  6,  6,  6,  6,  6,  6};             /* 30 - 36 */
 | |
| 
 | |
| /* char-to-digit conversion for bases 2-36; all non-digits are 37 */
 | |
| static int digitlookup[] = {
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  37, 37, 37, 37, 37, 37,
 | |
| 	37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
 | |
| 	25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37,
 | |
| 	37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
 | |
| 	25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
 | |
| 	37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37
 | |
| };
 | |
| 
 | |
| /*
 | |
| **	strtoul
 | |
| **		This is a general purpose routine for converting
 | |
| **		an ascii string to an integer in an arbitrary base.
 | |
| **		Leading white space is ignored.  If 'base' is zero
 | |
| **		it looks for a leading 0, 0x or 0X to tell which
 | |
| **		base.  If these are absent it defaults to 10.
 | |
| **		Base must be 0 or between 2 and 36 (inclusive).
 | |
| **		If 'ptr' is non-NULL it will contain a pointer to
 | |
| **		the end of the scan.
 | |
| **		Errors due to bad pointers will probably result in
 | |
| **		exceptions - we don't check for them.
 | |
| */
 | |
| unsigned long
 | |
| PyOS_strtoul(register char *str, char **ptr, int base)
 | |
| {
 | |
| 	register unsigned long result = 0; /* return value of the function */
 | |
| 	register int c;	 	/* current input character */
 | |
| 	register int ovlimit; 	/* required digits to overflow */
 | |
| 
 | |
| 	/* skip leading white space */
 | |
| 	while (*str && isspace(Py_CHARMASK(*str)))
 | |
| 		++str;
 | |
| 
 | |
| 	/* check for leading 0 or 0x for auto-base or base 16 */
 | |
| 	switch (base) {
 | |
| 		case 0:		/* look for leading 0, 0x or 0X */
 | |
| 			if (*str == '0') {
 | |
| 				++str;
 | |
| 				if (*str == 'x' || *str == 'X') {
 | |
| 					++str;
 | |
| 					base = 16;
 | |
| 				}
 | |
| 				else
 | |
| 					base = 8;
 | |
| 			}
 | |
| 			else
 | |
| 				base = 10;
 | |
| 			break;
 | |
| 
 | |
| 		case 16:	/* skip leading 0x or 0X */
 | |
| 			if (*str == '0') {
 | |
| 				++str;
 | |
| 				if (*str == 'x' || *str == 'X')
 | |
| 					++str;
 | |
| 			}
 | |
| 			break;
 | |
| 	}
 | |
| 
 | |
| 	/* catch silly bases */
 | |
| 	if (base < 2 || base > 36) {
 | |
| 		if (ptr)
 | |
| 			*ptr = str;
 | |
| 		return 0;
 | |
| 	}
 | |
| 
 | |
| 	/* skip leading zeroes */
 | |
| 	while (*str == '0')
 | |
| 		++str;
 | |
| 
 | |
| 	/* base is guaranteed to be in [2, 36] at this point */
 | |
| 	ovlimit = digitlimit[base];
 | |
| 
 | |
| 	/* do the conversion until non-digit character encountered */
 | |
| 	while ((c = digitlookup[Py_CHARMASK(*str)]) < base) {
 | |
| 		if (ovlimit > 0) /* no overflow check required */
 | |
| 			result = result * base + c;
 | |
| 		else { /* requires overflow check */
 | |
| 			register unsigned long temp_result;
 | |
| 
 | |
| 			if (ovlimit < 0) /* guaranteed overflow */
 | |
| 				goto overflowed;
 | |
| 
 | |
| 			/* there could be an overflow */
 | |
| 			/* check overflow just from shifting */
 | |
| 			if (result > smallmax[base])
 | |
| 				goto overflowed;
 | |
| 
 | |
| 			result *= base;
 | |
| 
 | |
| 			/* check overflow from the digit's value */
 | |
| 			temp_result = result + c;
 | |
| 			if (temp_result < result)
 | |
| 				goto overflowed;
 | |
| 
 | |
| 			result = temp_result;
 | |
| 		}
 | |
| 
 | |
| 		++str;
 | |
| 		--ovlimit;
 | |
| 	}
 | |
| 
 | |
| 	/* set pointer to point to the last character scanned */
 | |
| 	if (ptr)
 | |
| 		*ptr = str;
 | |
| 
 | |
| 	return result;
 | |
| 
 | |
| overflowed:
 | |
| 	if (ptr) {
 | |
| 		/* spool through remaining digit characters */
 | |
| 		while (digitlookup[Py_CHARMASK(*str)] < base)
 | |
| 			++str;
 | |
| 		*ptr = str;
 | |
| 	}
 | |
| 	errno = ERANGE;
 | |
| 	return (unsigned long)-1;
 | |
| }
 | |
| 
 | |
| long
 | |
| PyOS_strtol(char *str, char **ptr, int base)
 | |
| {
 | |
| 	long result;
 | |
| 	char sign;
 | |
| 
 | |
| 	while (*str && isspace(Py_CHARMASK(*str)))
 | |
| 		str++;
 | |
| 
 | |
| 	sign = *str;
 | |
| 	if (sign == '+' || sign == '-')
 | |
| 		str++;
 | |
| 
 | |
| 	result = (long) PyOS_strtoul(str, ptr, base);
 | |
| 
 | |
| 	/* Signal overflow if the result appears negative,
 | |
| 	   except for the largest negative integer */
 | |
| 	if (result < 0 && !(sign == '-' && result == -result)) {
 | |
| 		errno = ERANGE;
 | |
| 		result = 0x7fffffff;
 | |
| 	}
 | |
| 
 | |
| 	if (sign == '-')
 | |
| 		result = -result;
 | |
| 
 | |
| 	return result;
 | |
| }
 |