mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	Patch #1031213: Decode source line in SyntaxErrors back to its original
source encoding. Will backport to 2.5.
This commit is contained in:
		
							parent
							
								
									58bd49f5fe
								
							
						
					
					
						commit
						a5136196bc
					
				
					 6 changed files with 107 additions and 5 deletions
				
			
		| 
						 | 
					@ -155,6 +155,32 @@ def testWithAss(self):
 | 
				
			||||||
        self.assertEquals(dct.get('result'), 1)
 | 
					        self.assertEquals(dct.get('result'), 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _testErrEnc(self, src, text, offset):
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            compile(src, "", "exec")
 | 
				
			||||||
 | 
					        except SyntaxError, e:
 | 
				
			||||||
 | 
					            self.assertEquals(e.offset, offset)
 | 
				
			||||||
 | 
					            self.assertEquals(e.text, text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def testSourceCodeEncodingsError(self):
 | 
				
			||||||
 | 
					        # Test SyntaxError with encoding definition
 | 
				
			||||||
 | 
					        sjis = "print '\x83\x70\x83\x43\x83\x5c\x83\x93', '\n"
 | 
				
			||||||
 | 
					        ascii = "print '12345678', '\n"
 | 
				
			||||||
 | 
					        encdef = "#! -*- coding: ShiftJIS -*-\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # ascii source without encdef
 | 
				
			||||||
 | 
					        self._testErrEnc(ascii, ascii, 19)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # ascii source with encdef
 | 
				
			||||||
 | 
					        self._testErrEnc(encdef+ascii, ascii, 19)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # non-ascii source with encdef
 | 
				
			||||||
 | 
					        self._testErrEnc(encdef+sjis, sjis, 19)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # ShiftJIS source without encdef
 | 
				
			||||||
 | 
					        self._testErrEnc(sjis, sjis, 19)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NOLINENO = (compiler.ast.Module, compiler.ast.Stmt, compiler.ast.Discard)
 | 
					NOLINENO = (compiler.ast.Module, compiler.ast.Stmt, compiler.ast.Discard)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
###############################################################################
 | 
					###############################################################################
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -320,6 +320,7 @@ Lars Immisch
 | 
				
			||||||
Tony Ingraldi
 | 
					Tony Ingraldi
 | 
				
			||||||
John Interrante
 | 
					John Interrante
 | 
				
			||||||
Bob Ippolito
 | 
					Bob Ippolito
 | 
				
			||||||
 | 
					Atsuo Ishimoto
 | 
				
			||||||
Ben Jackson
 | 
					Ben Jackson
 | 
				
			||||||
Paul Jackson
 | 
					Paul Jackson
 | 
				
			||||||
David Jacobs
 | 
					David Jacobs
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12,6 +12,9 @@ What's New in Python 2.6 alpha 1?
 | 
				
			||||||
Core and builtins
 | 
					Core and builtins
 | 
				
			||||||
-----------------
 | 
					-----------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- Patch #1031213: Decode source line in SyntaxErrors back to its original source
 | 
				
			||||||
 | 
					  encoding.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- Py_ssize_t fields work in structmember when HAVE_LONG_LONG is not defined.
 | 
					- Py_ssize_t fields work in structmember when HAVE_LONG_LONG is not defined.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- PEP 3123: Provide forward compatibility with Python 3.0, while keeping
 | 
					- PEP 3123: Provide forward compatibility with Python 3.0, while keeping
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -218,17 +218,25 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
 | 
				
			||||||
			err_ret->error = E_EOF;
 | 
								err_ret->error = E_EOF;
 | 
				
			||||||
		err_ret->lineno = tok->lineno;
 | 
							err_ret->lineno = tok->lineno;
 | 
				
			||||||
		if (tok->buf != NULL) {
 | 
							if (tok->buf != NULL) {
 | 
				
			||||||
 | 
								char *text = NULL;
 | 
				
			||||||
			size_t len;
 | 
								size_t len;
 | 
				
			||||||
			assert(tok->cur - tok->buf < INT_MAX);
 | 
								assert(tok->cur - tok->buf < INT_MAX);
 | 
				
			||||||
			err_ret->offset = (int)(tok->cur - tok->buf);
 | 
								err_ret->offset = (int)(tok->cur - tok->buf);
 | 
				
			||||||
			len = tok->inp - tok->buf;
 | 
								len = tok->inp - tok->buf;
 | 
				
			||||||
			err_ret->text = (char *) PyObject_MALLOC(len + 1);
 | 
					#ifdef Py_USING_UNICODE
 | 
				
			||||||
			if (err_ret->text != NULL) {
 | 
								text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
								if (text == NULL) {
 | 
				
			||||||
 | 
									text = (char *) PyObject_MALLOC(len + 1);
 | 
				
			||||||
 | 
									if (text != NULL) {
 | 
				
			||||||
					if (len > 0)
 | 
										if (len > 0)
 | 
				
			||||||
					strncpy(err_ret->text, tok->buf, len);
 | 
											strncpy(text, tok->buf, len);
 | 
				
			||||||
				err_ret->text[len] = '\0';
 | 
										text[len] = '\0';
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
								err_ret->text = text;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
	} else if (tok->encoding != NULL) {
 | 
						} else if (tok->encoding != NULL) {
 | 
				
			||||||
		node* r = PyNode_New(encoding_decl);
 | 
							node* r = PyNode_New(encoding_decl);
 | 
				
			||||||
		if (!r) {
 | 
							if (!r) {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1522,6 +1522,68 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
 | 
				
			||||||
	return result;
 | 
						return result;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* This function is only called from parsetok. However, it cannot live
 | 
				
			||||||
 | 
					   there, as it must be empty for PGEN, and we can check for PGEN only
 | 
				
			||||||
 | 
					   in this file. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef PGEN
 | 
				
			||||||
 | 
					char*
 | 
				
			||||||
 | 
					PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					static PyObject *
 | 
				
			||||||
 | 
					dec_utf8(const char *enc, const char *text, size_t len) {
 | 
				
			||||||
 | 
						PyObject *ret = NULL;	
 | 
				
			||||||
 | 
						PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace");
 | 
				
			||||||
 | 
						if (unicode_text) {
 | 
				
			||||||
 | 
							ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace");
 | 
				
			||||||
 | 
							Py_DECREF(unicode_text);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (!ret) {
 | 
				
			||||||
 | 
							PyErr_Print();
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					char *
 | 
				
			||||||
 | 
					PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						char *text = NULL;
 | 
				
			||||||
 | 
						if (tok->encoding) {
 | 
				
			||||||
 | 
							/* convert source to original encondig */
 | 
				
			||||||
 | 
							PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len);
 | 
				
			||||||
 | 
							if (lineobj != NULL) {
 | 
				
			||||||
 | 
								int linelen = PyString_Size(lineobj);
 | 
				
			||||||
 | 
								const char *line = PyString_AsString(lineobj);
 | 
				
			||||||
 | 
								text = PyObject_MALLOC(linelen + 1);
 | 
				
			||||||
 | 
								if (text != NULL && line != NULL) {
 | 
				
			||||||
 | 
									if (linelen)
 | 
				
			||||||
 | 
										strncpy(text, line, linelen);
 | 
				
			||||||
 | 
									text[linelen] = '\0';
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								Py_DECREF(lineobj);
 | 
				
			||||||
 | 
										
 | 
				
			||||||
 | 
								/* adjust error offset */
 | 
				
			||||||
 | 
								if (*offset > 1) {
 | 
				
			||||||
 | 
									PyObject *offsetobj = dec_utf8(tok->encoding, 
 | 
				
			||||||
 | 
												       tok->buf, *offset-1);
 | 
				
			||||||
 | 
									if (offsetobj) {
 | 
				
			||||||
 | 
										*offset = PyString_Size(offsetobj) + 1;
 | 
				
			||||||
 | 
										Py_DECREF(offsetobj);
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return text;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								   
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef Py_DEBUG
 | 
					#ifdef Py_DEBUG
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void
 | 
					void
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -58,6 +58,8 @@ extern struct tok_state *PyTokenizer_FromString(const char *);
 | 
				
			||||||
extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *);
 | 
					extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *);
 | 
				
			||||||
extern void PyTokenizer_Free(struct tok_state *);
 | 
					extern void PyTokenizer_Free(struct tok_state *);
 | 
				
			||||||
extern int PyTokenizer_Get(struct tok_state *, char **, char **);
 | 
					extern int PyTokenizer_Get(struct tok_state *, char **, char **);
 | 
				
			||||||
 | 
					extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, 
 | 
				
			||||||
 | 
										  int len, int *offset);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef __cplusplus
 | 
					#ifdef __cplusplus
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue