mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	Backport of several functions from Python 3.0 to 2.6 including PyUnicode_FromString, PyUnicode_Format and PyLong_From/AsSsize_t. The functions are partly required for the backport of the bytearray type and _fileio module. They should also make it easier to port C to 3.0.
First chapter of the Python 3.0 io framework back port: _fileio The next step depends on a working bytearray type which itself depends on a backport of the nwe buffer API.
This commit is contained in:
		
							parent
							
								
									5f95a79b2b
								
							
						
					
					
						commit
						7f39c9fcbb
					
				
					 8 changed files with 1599 additions and 173 deletions
				
			
		|  | @ -397,6 +397,57 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u, | |||
|     return (PyObject *)unicode; | ||||
| } | ||||
| 
 | ||||
| PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) | ||||
| { | ||||
|     PyUnicodeObject *unicode; | ||||
|     /* If the Unicode data is known at construction time, we can apply
 | ||||
|        some optimizations which share commonly used objects. | ||||
|        Also, this means the input must be UTF-8, so fall back to the | ||||
|        UTF-8 decoder at the end. */ | ||||
|     if (u != NULL) { | ||||
| 
 | ||||
| 	/* Optimization for empty strings */ | ||||
| 	if (size == 0 && unicode_empty != NULL) { | ||||
| 	    Py_INCREF(unicode_empty); | ||||
| 	    return (PyObject *)unicode_empty; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Single characters are shared when using this constructor.
 | ||||
|            Restrict to ASCII, since the input must be UTF-8. */ | ||||
| 	if (size == 1 && Py_CHARMASK(*u) < 128) { | ||||
| 	    unicode = unicode_latin1[Py_CHARMASK(*u)]; | ||||
| 	    if (!unicode) { | ||||
| 		unicode = _PyUnicode_New(1); | ||||
| 		if (!unicode) | ||||
| 		    return NULL; | ||||
| 		unicode->str[0] = Py_CHARMASK(*u); | ||||
| 		unicode_latin1[Py_CHARMASK(*u)] = unicode; | ||||
| 	    } | ||||
| 	    Py_INCREF(unicode); | ||||
| 	    return (PyObject *)unicode; | ||||
| 	} | ||||
| 
 | ||||
|         return PyUnicode_DecodeUTF8(u, size, NULL); | ||||
|     } | ||||
| 
 | ||||
|     unicode = _PyUnicode_New(size); | ||||
|     if (!unicode) | ||||
|         return NULL; | ||||
| 
 | ||||
|     return (PyObject *)unicode; | ||||
| } | ||||
| 
 | ||||
| PyObject *PyUnicode_FromString(const char *u) | ||||
| { | ||||
|     size_t size = strlen(u); | ||||
|     if (size > PY_SSIZE_T_MAX) { | ||||
|         PyErr_SetString(PyExc_OverflowError, "input too long"); | ||||
|         return NULL; | ||||
|     } | ||||
| 
 | ||||
|     return PyUnicode_FromStringAndSize(u, size); | ||||
| } | ||||
| 
 | ||||
| #ifdef HAVE_WCHAR_H | ||||
| 
 | ||||
| PyObject *PyUnicode_FromWideChar(register const wchar_t *w, | ||||
|  | @ -429,6 +480,420 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w, | |||
|     return (PyObject *)unicode; | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| makefmt(char *fmt, int longflag, int size_tflag, int zeropad, int width, int precision, char c) | ||||
| { | ||||
| 	*fmt++ = '%'; | ||||
| 	if (width) { | ||||
| 		if (zeropad) | ||||
| 			*fmt++ = '0'; | ||||
| 		fmt += sprintf(fmt, "%d", width); | ||||
| 	} | ||||
| 	if (precision) | ||||
| 		fmt += sprintf(fmt, ".%d", precision); | ||||
| 	if (longflag) | ||||
| 		*fmt++ = 'l'; | ||||
| 	else if (size_tflag) { | ||||
| 		char *f = PY_FORMAT_SIZE_T; | ||||
| 		while (*f) | ||||
| 			*fmt++ = *f++; | ||||
| 	} | ||||
| 	*fmt++ = c; | ||||
| 	*fmt = '\0'; | ||||
| } | ||||
| 
 | ||||
| #define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;} | ||||
| 
 | ||||
| PyObject * | ||||
| PyUnicode_FromFormatV(const char *format, va_list vargs) | ||||
| { | ||||
| 	va_list count; | ||||
| 	Py_ssize_t callcount = 0; | ||||
| 	PyObject **callresults = NULL; | ||||
| 	PyObject **callresult = NULL; | ||||
| 	Py_ssize_t n = 0; | ||||
| 	int width = 0; | ||||
| 	int precision = 0; | ||||
| 	int zeropad; | ||||
| 	const char* f; | ||||
| 	Py_UNICODE *s; | ||||
| 	PyObject *string; | ||||
| 	/* used by sprintf */ | ||||
| 	char buffer[21]; | ||||
| 	/* use abuffer instead of buffer, if we need more space
 | ||||
| 	 * (which can happen if there's a format specifier with width). */ | ||||
| 	char *abuffer = NULL; | ||||
| 	char *realbuffer; | ||||
| 	Py_ssize_t abuffersize = 0; | ||||
| 	char fmt[60]; /* should be enough for %0width.precisionld */ | ||||
| 	const char *copy; | ||||
| 
 | ||||
| #ifdef VA_LIST_IS_ARRAY | ||||
| 	Py_MEMCPY(count, vargs, sizeof(va_list)); | ||||
| #else | ||||
| #ifdef  __va_copy | ||||
| 	__va_copy(count, vargs); | ||||
| #else | ||||
| 	count = vargs; | ||||
| #endif | ||||
| #endif | ||||
| 	/* step 1: count the number of %S/%R format specifications
 | ||||
| 	 * (we call PyObject_Str()/PyObject_Repr() for these objects | ||||
| 	 * once during step 3 and put the result in an array) */ | ||||
| 	for (f = format; *f; f++) { | ||||
| 		if (*f == '%' && (*(f+1)=='S' || *(f+1)=='R')) | ||||
| 			++callcount; | ||||
| 	} | ||||
| 	/* step 2: allocate memory for the results of
 | ||||
| 	 * PyObject_Str()/PyObject_Repr() calls */ | ||||
| 	if (callcount) { | ||||
| 		callresults = PyMem_Malloc(sizeof(PyObject *)*callcount); | ||||
| 		if (!callresults) { | ||||
| 			PyErr_NoMemory(); | ||||
| 			return NULL; | ||||
| 		} | ||||
| 		callresult = callresults; | ||||
| 	} | ||||
| 	/* step 3: figure out how large a buffer we need */ | ||||
| 	for (f = format; *f; f++) { | ||||
| 		if (*f == '%') { | ||||
| 			const char* p = f; | ||||
| 			width = 0; | ||||
| 			while (isdigit(*f)) | ||||
| 				width = (width*10) + *f++ - '0'; | ||||
| 			while (*++f && *f != '%' && !isalpha(*f)) | ||||
| 				; | ||||
| 
 | ||||
| 			/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
 | ||||
| 			 * they don't affect the amount of space we reserve. | ||||
| 			 */ | ||||
| 			if ((*f == 'l' || *f == 'z') && | ||||
| 					(f[1] == 'd' || f[1] == 'u')) | ||||
|                                 ++f; | ||||
| 
 | ||||
| 			switch (*f) { | ||||
| 			case 'c': | ||||
| 				(void)va_arg(count, int); | ||||
| 				/* fall through... */ | ||||
| 			case '%': | ||||
| 				n++; | ||||
| 				break; | ||||
| 			case 'd': case 'u': case 'i': case 'x': | ||||
| 				(void) va_arg(count, int); | ||||
| 				/* 20 bytes is enough to hold a 64-bit
 | ||||
| 				   integer.  Decimal takes the most space. | ||||
| 				   This isn't enough for octal. | ||||
| 				   If a width is specified we need more | ||||
| 				   (which we allocate later). */ | ||||
| 				if (width < 20) | ||||
| 					width = 20; | ||||
| 				n += width; | ||||
| 				if (abuffersize < width) | ||||
| 					abuffersize = width; | ||||
| 				break; | ||||
| 			case 's': | ||||
| 			{ | ||||
| 				/* UTF-8 */ | ||||
| 				unsigned char*s; | ||||
| 				s = va_arg(count, unsigned char*); | ||||
| 				while (*s) { | ||||
| 					if (*s < 128) { | ||||
| 						n++; s++; | ||||
| 					} else if (*s < 0xc0) { | ||||
| 						/* invalid UTF-8 */ | ||||
| 						n++; s++; | ||||
| 					} else if (*s < 0xc0) { | ||||
| 						n++; | ||||
| 						s++; if(!*s)break; | ||||
| 						s++; | ||||
| 					} else if (*s < 0xe0) { | ||||
| 						n++; | ||||
| 						s++; if(!*s)break; | ||||
| 						s++; if(!*s)break; | ||||
| 						s++; | ||||
| 					} else { | ||||
| 						#ifdef Py_UNICODE_WIDE | ||||
| 						n++; | ||||
| 						#else | ||||
| 						n+=2; | ||||
| 						#endif | ||||
| 						s++; if(!*s)break; | ||||
| 						s++; if(!*s)break; | ||||
| 						s++; if(!*s)break; | ||||
| 						s++; | ||||
| 					} | ||||
| 				} | ||||
| 				break; | ||||
| 			} | ||||
| 			case 'U': | ||||
| 			{ | ||||
| 				PyObject *obj = va_arg(count, PyObject *); | ||||
| 				assert(obj && PyUnicode_Check(obj)); | ||||
| 				n += PyUnicode_GET_SIZE(obj); | ||||
| 				break; | ||||
| 			} | ||||
| 			case 'V': | ||||
| 			{ | ||||
| 				PyObject *obj = va_arg(count, PyObject *); | ||||
| 				const char *str = va_arg(count, const char *); | ||||
| 				assert(obj || str); | ||||
| 				assert(!obj || PyUnicode_Check(obj)); | ||||
| 				if (obj) | ||||
| 					n += PyUnicode_GET_SIZE(obj); | ||||
| 				else | ||||
| 					n += strlen(str); | ||||
| 				break; | ||||
| 			} | ||||
| 			case 'S': | ||||
| 			{ | ||||
| 				PyObject *obj = va_arg(count, PyObject *); | ||||
| 				PyObject *str; | ||||
| 				assert(obj); | ||||
| 				str = PyObject_Str(obj); | ||||
| 				if (!str) | ||||
| 					goto fail; | ||||
| 				n += PyUnicode_GET_SIZE(str); | ||||
| 				/* Remember the str and switch to the next slot */ | ||||
| 				*callresult++ = str; | ||||
| 				break; | ||||
| 			} | ||||
| 			case 'R': | ||||
| 			{ | ||||
| 				PyObject *obj = va_arg(count, PyObject *); | ||||
| 				PyObject *repr; | ||||
| 				assert(obj); | ||||
| 				repr = PyObject_Repr(obj); | ||||
| 				if (!repr) | ||||
| 					goto fail; | ||||
| 				n += PyUnicode_GET_SIZE(repr); | ||||
| 				/* Remember the repr and switch to the next slot */ | ||||
| 				*callresult++ = repr; | ||||
| 				break; | ||||
| 			} | ||||
| 			case 'p': | ||||
| 				(void) va_arg(count, int); | ||||
| 				/* maximum 64-bit pointer representation:
 | ||||
| 				 * 0xffffffffffffffff | ||||
| 				 * so 19 characters is enough. | ||||
| 				 * XXX I count 18 -- what's the extra for? | ||||
| 				 */ | ||||
| 				n += 19; | ||||
| 				break; | ||||
| 			default: | ||||
| 				/* if we stumble upon an unknown
 | ||||
| 				   formatting code, copy the rest of | ||||
| 				   the format string to the output | ||||
| 				   string. (we cannot just skip the | ||||
| 				   code, since there's no way to know | ||||
| 				   what's in the argument list) */ | ||||
| 				n += strlen(p); | ||||
| 				goto expand; | ||||
| 			} | ||||
| 		} else | ||||
| 			n++; | ||||
| 	} | ||||
|  expand: | ||||
| 	if (abuffersize > 20) { | ||||
| 		abuffer = PyMem_Malloc(abuffersize); | ||||
| 		if (!abuffer) { | ||||
| 			PyErr_NoMemory(); | ||||
| 			goto fail; | ||||
| 		} | ||||
| 		realbuffer = abuffer; | ||||
| 	} | ||||
| 	else | ||||
| 		realbuffer = buffer; | ||||
| 	/* step 4: fill the buffer */ | ||||
| 	/* Since we've analyzed how much space we need for the worst case,
 | ||||
| 	   we don't have to resize the string. | ||||
| 	   There can be no errors beyond this point. */ | ||||
| 	string = PyUnicode_FromUnicode(NULL, n); | ||||
| 	if (!string) | ||||
| 		goto fail; | ||||
| 
 | ||||
| 	s = PyUnicode_AS_UNICODE(string); | ||||
| 	callresult = callresults; | ||||
| 
 | ||||
| 	for (f = format; *f; f++) { | ||||
| 		if (*f == '%') { | ||||
| 			const char* p = f++; | ||||
| 			int longflag = 0; | ||||
| 			int size_tflag = 0; | ||||
| 			zeropad = (*f == '0'); | ||||
| 			/* parse the width.precision part */ | ||||
| 			width = 0; | ||||
| 			while (isdigit(*f)) | ||||
| 				width = (width*10) + *f++ - '0'; | ||||
| 			precision = 0; | ||||
| 			if (*f == '.') { | ||||
| 				f++; | ||||
| 				while (isdigit(*f)) | ||||
| 					precision = (precision*10) + *f++ - '0'; | ||||
| 			} | ||||
| 			/* handle the long flag, but only for %ld and %lu.
 | ||||
| 			   others can be added when necessary. */ | ||||
| 			if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) { | ||||
| 				longflag = 1; | ||||
| 				++f; | ||||
| 			} | ||||
| 			/* handle the size_t flag. */ | ||||
| 			if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { | ||||
| 				size_tflag = 1; | ||||
| 				++f; | ||||
| 			} | ||||
| 
 | ||||
| 			switch (*f) { | ||||
| 			case 'c': | ||||
| 				*s++ = va_arg(vargs, int); | ||||
| 				break; | ||||
| 			case 'd': | ||||
| 				makefmt(fmt, longflag, size_tflag, zeropad, width, precision, 'd'); | ||||
| 				if (longflag) | ||||
| 					sprintf(realbuffer, fmt, va_arg(vargs, long)); | ||||
| 				else if (size_tflag) | ||||
| 					sprintf(realbuffer, fmt, va_arg(vargs, Py_ssize_t)); | ||||
| 				else | ||||
| 					sprintf(realbuffer, fmt, va_arg(vargs, int)); | ||||
| 				appendstring(realbuffer); | ||||
| 				break; | ||||
| 			case 'u': | ||||
| 				makefmt(fmt, longflag, size_tflag, zeropad, width, precision, 'u'); | ||||
| 				if (longflag) | ||||
| 					sprintf(realbuffer, fmt, va_arg(vargs, unsigned long)); | ||||
| 				else if (size_tflag) | ||||
| 					sprintf(realbuffer, fmt, va_arg(vargs, size_t)); | ||||
| 				else | ||||
| 					sprintf(realbuffer, fmt, va_arg(vargs, unsigned int)); | ||||
| 				appendstring(realbuffer); | ||||
| 				break; | ||||
| 			case 'i': | ||||
| 				makefmt(fmt, 0, 0, zeropad, width, precision, 'i'); | ||||
| 				sprintf(realbuffer, fmt, va_arg(vargs, int)); | ||||
| 				appendstring(realbuffer); | ||||
| 				break; | ||||
| 			case 'x': | ||||
| 				makefmt(fmt, 0, 0, zeropad, width, precision, 'x'); | ||||
| 				sprintf(realbuffer, fmt, va_arg(vargs, int)); | ||||
| 				appendstring(realbuffer); | ||||
| 				break; | ||||
| 			case 's': | ||||
| 			{ | ||||
| 				/* Parameter must be UTF-8 encoded.
 | ||||
| 				   In case of encoding errors, use | ||||
| 				   the replacement character. */ | ||||
| 				PyObject *u; | ||||
| 				p = va_arg(vargs, char*); | ||||
| 				u = PyUnicode_DecodeUTF8(p, strlen(p),  | ||||
| 							 "replace"); | ||||
| 				if (!u) | ||||
| 					goto fail; | ||||
| 				Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(u), | ||||
| 						PyUnicode_GET_SIZE(u)); | ||||
| 				s += PyUnicode_GET_SIZE(u); | ||||
| 				Py_DECREF(u); | ||||
| 				break; | ||||
| 			} | ||||
| 			case 'U': | ||||
| 			{ | ||||
| 				PyObject *obj = va_arg(vargs, PyObject *); | ||||
| 				Py_ssize_t size = PyUnicode_GET_SIZE(obj); | ||||
| 				Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size); | ||||
| 				s += size; | ||||
| 				break; | ||||
| 			} | ||||
| 			case 'V': | ||||
| 			{ | ||||
| 				PyObject *obj = va_arg(vargs, PyObject *); | ||||
| 				const char *str = va_arg(vargs, const char *); | ||||
| 				if (obj) { | ||||
| 					Py_ssize_t size = PyUnicode_GET_SIZE(obj); | ||||
| 					Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size); | ||||
| 					s += size; | ||||
| 				} else { | ||||
| 					appendstring(str); | ||||
| 				} | ||||
| 				break; | ||||
| 			} | ||||
| 			case 'S': | ||||
| 			case 'R': | ||||
| 			{ | ||||
| 				Py_UNICODE *ucopy; | ||||
| 				Py_ssize_t usize; | ||||
| 				Py_ssize_t upos; | ||||
| 				/* unused, since we already have the result */ | ||||
| 				(void) va_arg(vargs, PyObject *); | ||||
| 				ucopy = PyUnicode_AS_UNICODE(*callresult); | ||||
| 				usize = PyUnicode_GET_SIZE(*callresult); | ||||
| 				for (upos = 0; upos<usize;) | ||||
| 					*s++ = ucopy[upos++]; | ||||
| 				/* We're done with the unicode()/repr() => forget it */ | ||||
| 				Py_DECREF(*callresult); | ||||
| 				/* switch to next unicode()/repr() result */ | ||||
| 				++callresult; | ||||
| 				break; | ||||
| 			} | ||||
| 			case 'p': | ||||
| 				sprintf(buffer, "%p", va_arg(vargs, void*)); | ||||
| 				/* %p is ill-defined:  ensure leading 0x. */ | ||||
| 				if (buffer[1] == 'X') | ||||
| 					buffer[1] = 'x'; | ||||
| 				else if (buffer[1] != 'x') { | ||||
| 					memmove(buffer+2, buffer, strlen(buffer)+1); | ||||
| 					buffer[0] = '0'; | ||||
| 					buffer[1] = 'x'; | ||||
| 				} | ||||
| 				appendstring(buffer); | ||||
| 				break; | ||||
| 			case '%': | ||||
| 				*s++ = '%'; | ||||
| 				break; | ||||
| 			default: | ||||
| 				appendstring(p); | ||||
| 				goto end; | ||||
| 			} | ||||
| 		} else | ||||
| 			*s++ = *f; | ||||
| 	} | ||||
| 
 | ||||
|  end: | ||||
| 	if (callresults) | ||||
| 		PyMem_Free(callresults); | ||||
| 	if (abuffer) | ||||
| 		PyMem_Free(abuffer); | ||||
| 	_PyUnicode_Resize(&string, s - PyUnicode_AS_UNICODE(string)); | ||||
| 	return string; | ||||
|  fail: | ||||
| 	if (callresults) { | ||||
| 		PyObject **callresult2 = callresults; | ||||
| 		while (callresult2 < callresult) { | ||||
| 			Py_DECREF(*callresult2); | ||||
| 			++callresult2; | ||||
| 		} | ||||
| 		PyMem_Free(callresults); | ||||
| 	} | ||||
| 	if (abuffer) | ||||
| 		PyMem_Free(abuffer); | ||||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| #undef appendstring | ||||
| 
 | ||||
| PyObject * | ||||
| PyUnicode_FromFormat(const char *format, ...) | ||||
| { | ||||
| 	PyObject* ret; | ||||
| 	va_list vargs; | ||||
| 
 | ||||
| #ifdef HAVE_STDARG_PROTOTYPES | ||||
| 	va_start(vargs, format); | ||||
| #else | ||||
| 	va_start(vargs); | ||||
| #endif | ||||
| 	ret = PyUnicode_FromFormatV(format, vargs); | ||||
| 	va_end(vargs); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode, | ||||
| 				wchar_t *w, | ||||
| 				Py_ssize_t size) | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Christian Heimes
						Christian Heimes