mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 21:51:50 +00:00 
			
		
		
		
	Fix PEP 263 code --without-unicode. Fixes #591943.
This commit is contained in:
		
							parent
							
								
									63d5bead18
								
							
						
					
					
						commit
						019934b3cc
					
				
					 2 changed files with 28 additions and 0 deletions
				
			
		|  | @ -256,11 +256,18 @@ check_coding_spec(const char* line, int size, struct tok_state *tok, | ||||||
| 			    strcmp(cs, "iso-8859-1") == 0) { | 			    strcmp(cs, "iso-8859-1") == 0) { | ||||||
| 				tok->encoding = cs; | 				tok->encoding = cs; | ||||||
| 			} else { | 			} else { | ||||||
|  | #ifdef Py_USING_UNICODE | ||||||
| 				r = set_readline(tok, cs); | 				r = set_readline(tok, cs); | ||||||
| 				if (r) { | 				if (r) { | ||||||
| 					tok->encoding = cs; | 					tok->encoding = cs; | ||||||
| 					tok->decoding_state = -1; | 					tok->decoding_state = -1; | ||||||
| 				} | 				} | ||||||
|  | #else | ||||||
|  |                                 /* Without Unicode support, we cannot
 | ||||||
|  |                                    process the coding spec. Since there | ||||||
|  |                                    won't be any Unicode literals, that | ||||||
|  |                                    won't matter. */ | ||||||
|  | #endif | ||||||
| 			} | 			} | ||||||
| 		} else {	/* then, compare cs with BOM */ | 		} else {	/* then, compare cs with BOM */ | ||||||
| 			r = (strcmp(tok->encoding, cs) == 0); | 			r = (strcmp(tok->encoding, cs) == 0); | ||||||
|  | @ -317,6 +324,10 @@ check_bom(int get_char(struct tok_state *), | ||||||
| static char * | static char * | ||||||
| fp_readl(char *s, int size, struct tok_state *tok) | fp_readl(char *s, int size, struct tok_state *tok) | ||||||
| { | { | ||||||
|  | #ifndef Py_USING_UNICODE | ||||||
|  | 	/* In a non-Unicode built, this should never be called. */ | ||||||
|  | 	abort(); | ||||||
|  | #else | ||||||
| 	PyObject* utf8; | 	PyObject* utf8; | ||||||
| 	PyObject* buf = tok->decoding_buffer; | 	PyObject* buf = tok->decoding_buffer; | ||||||
| 	if (buf == NULL) { | 	if (buf == NULL) { | ||||||
|  | @ -338,6 +349,7 @@ fp_readl(char *s, int size, struct tok_state *tok) | ||||||
| 		if (s[0] == '\0') return NULL; /* EOF */ | 		if (s[0] == '\0') return NULL; /* EOF */ | ||||||
| 		return s; | 		return s; | ||||||
| 	} | 	} | ||||||
|  | #endif | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* Set the readline function for TOK to a StreamReader's
 | /* Set the readline function for TOK to a StreamReader's
 | ||||||
|  | @ -487,6 +499,7 @@ static int buf_setreadl(struct tok_state *tok, const char* enc) { | ||||||
| /* Return a UTF-8 encoding Python string object from the
 | /* Return a UTF-8 encoding Python string object from the
 | ||||||
|    C byte string STR, which is encoded with ENC. */ |    C byte string STR, which is encoded with ENC. */ | ||||||
| 
 | 
 | ||||||
|  | #ifdef Py_USING_UNICODE | ||||||
| static PyObject * | static PyObject * | ||||||
| translate_into_utf8(const char* str, const char* enc) { | translate_into_utf8(const char* str, const char* enc) { | ||||||
| 	PyObject *utf8; | 	PyObject *utf8; | ||||||
|  | @ -497,6 +510,7 @@ translate_into_utf8(const char* str, const char* enc) { | ||||||
| 	Py_DECREF(buf); | 	Py_DECREF(buf); | ||||||
| 	return utf8; | 	return utf8; | ||||||
| } | } | ||||||
|  | #endif | ||||||
| 
 | 
 | ||||||
| /* Decode a byte string STR for use as the buffer of TOK.
 | /* Decode a byte string STR for use as the buffer of TOK.
 | ||||||
|    Look for encoding declarations inside STR, and record them |    Look for encoding declarations inside STR, and record them | ||||||
|  | @ -514,12 +528,14 @@ decode_str(const char *str, struct tok_state *tok) | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	str = tok->str;		/* string after BOM if any */ | 	str = tok->str;		/* string after BOM if any */ | ||||||
| 	assert(str); | 	assert(str); | ||||||
|  | #ifdef Py_USING_UNICODE | ||||||
| 	if (tok->enc != NULL) { | 	if (tok->enc != NULL) { | ||||||
| 		utf8 = translate_into_utf8(str, tok->enc); | 		utf8 = translate_into_utf8(str, tok->enc); | ||||||
| 		if (utf8 == NULL) | 		if (utf8 == NULL) | ||||||
| 			return NULL; | 			return NULL; | ||||||
| 		str = PyString_AsString(utf8); | 		str = PyString_AsString(utf8); | ||||||
| 	} | 	} | ||||||
|  | #endif | ||||||
| 	for (s = str;; s++) { | 	for (s = str;; s++) { | ||||||
| 		if (*s == '\0') break; | 		if (*s == '\0') break; | ||||||
| 		else if (*s == '\n') { | 		else if (*s == '\n') { | ||||||
|  | @ -530,6 +546,7 @@ decode_str(const char *str, struct tok_state *tok) | ||||||
| 	tok->enc = NULL; | 	tok->enc = NULL; | ||||||
| 	if (!check_coding_spec(str, s - str, tok, buf_setreadl)) | 	if (!check_coding_spec(str, s - str, tok, buf_setreadl)) | ||||||
| 		return NULL; | 		return NULL; | ||||||
|  | #ifdef Py_USING_UNICODE | ||||||
| 	if (tok->enc != NULL) { | 	if (tok->enc != NULL) { | ||||||
| 		assert(utf8 == NULL); | 		assert(utf8 == NULL); | ||||||
| 		utf8 = translate_into_utf8(str, tok->enc); | 		utf8 = translate_into_utf8(str, tok->enc); | ||||||
|  | @ -537,6 +554,7 @@ decode_str(const char *str, struct tok_state *tok) | ||||||
| 			return NULL; | 			return NULL; | ||||||
| 		str = PyString_AsString(utf8); | 		str = PyString_AsString(utf8); | ||||||
| 	} | 	} | ||||||
|  | #endif | ||||||
| 	assert(tok->decoding_buffer == NULL); | 	assert(tok->decoding_buffer == NULL); | ||||||
| 	tok->decoding_buffer = utf8; /* CAUTION */ | 	tok->decoding_buffer = utf8; /* CAUTION */ | ||||||
| 	return str; | 	return str; | ||||||
|  |  | ||||||
|  | @ -1185,6 +1185,9 @@ parsenumber(struct compiling *co, char *s) | ||||||
| static PyObject * | static PyObject * | ||||||
| decode_utf8(char **sPtr, char *end, char* encoding) | decode_utf8(char **sPtr, char *end, char* encoding) | ||||||
| { | { | ||||||
|  | #ifndef Py_USING_UNICODE | ||||||
|  | 	abort(); | ||||||
|  | #else | ||||||
| 	PyObject *u, *v; | 	PyObject *u, *v; | ||||||
| 	char *s, *t; | 	char *s, *t; | ||||||
| 	t = s = *sPtr; | 	t = s = *sPtr; | ||||||
|  | @ -1197,6 +1200,7 @@ decode_utf8(char **sPtr, char *end, char* encoding) | ||||||
| 	v = PyUnicode_AsEncodedString(u, encoding, NULL); | 	v = PyUnicode_AsEncodedString(u, encoding, NULL); | ||||||
| 	Py_DECREF(u); | 	Py_DECREF(u); | ||||||
| 	return v; | 	return v; | ||||||
|  | #endif | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static PyObject * | static PyObject * | ||||||
|  | @ -1312,12 +1316,18 @@ parsestr(struct compiling *com, char *s) | ||||||
| 			 strcmp(encoding, "iso-8859-1") != 0); | 			 strcmp(encoding, "iso-8859-1") != 0); | ||||||
| 	if (rawmode || strchr(s, '\\') == NULL) { | 	if (rawmode || strchr(s, '\\') == NULL) { | ||||||
| 		if (need_encoding) { | 		if (need_encoding) { | ||||||
|  | #ifndef Py_USING_UNICODE | ||||||
|  | 			/* This should not happen - we never see any other
 | ||||||
|  | 			   encoding. */ | ||||||
|  | 			abort(); | ||||||
|  | #else | ||||||
| 			PyObject* u = PyUnicode_DecodeUTF8(s, len, NULL); | 			PyObject* u = PyUnicode_DecodeUTF8(s, len, NULL); | ||||||
| 			if (u == NULL) | 			if (u == NULL) | ||||||
| 				return NULL; | 				return NULL; | ||||||
| 			v = PyUnicode_AsEncodedString(u, encoding, NULL); | 			v = PyUnicode_AsEncodedString(u, encoding, NULL); | ||||||
| 			Py_DECREF(u); | 			Py_DECREF(u); | ||||||
| 			return v; | 			return v; | ||||||
|  | #endif | ||||||
| 		} else { | 		} else { | ||||||
| 			return PyString_FromStringAndSize(s, len); | 			return PyString_FromStringAndSize(s, len); | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Martin v. Löwis
						Martin v. Löwis