mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 07:31:38 +00:00 
			
		
		
		
	Merged revisions 76230 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r76230 | benjamin.peterson | 2009-11-12 17:39:44 -0600 (Thu, 12 Nov 2009) | 2 lines fix several compile() issues by translating newlines in the tokenizer ........
This commit is contained in:
		
							parent
							
								
									a1d23326b1
								
							
						
					
					
						commit
						aeaa592516
					
				
					 8 changed files with 106 additions and 35 deletions
				
			
		| 
						 | 
					@ -176,11 +176,15 @@ are always available.  They are listed here in alphabetical order.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   .. note::
 | 
					   .. note::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      When compiling a string with multi-line statements, line endings must be
 | 
					      When compiling a string with multi-line statements in ``'single'`` or
 | 
				
			||||||
      represented by a single newline character (``'\n'``), and the input must
 | 
					      ``'eval'`` mode, input must be terminated by at least one newline
 | 
				
			||||||
      be terminated by at least one newline character.  If line endings are
 | 
					      character.  This is to facilitate detection of incomplete and complete
 | 
				
			||||||
      represented by ``'\r\n'``, use :meth:`str.replace` to change them into
 | 
					      statements in the :mod:`code` module.
 | 
				
			||||||
      ``'\n'``.
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   .. versionchanged:: 3.2
 | 
				
			||||||
 | 
					      Allowed use of Windows and Mac newlines.  Also input in ``'exec'`` mode
 | 
				
			||||||
 | 
					      does not have to end in a newline anymore.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.. function:: complex([real[, imag]])
 | 
					.. function:: complex([real[, imag]])
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -295,10 +295,6 @@ def test_filename(self):
 | 
				
			||||||
        self.assertNotEquals(compile_command("a = 1\n", "abc").co_filename,
 | 
					        self.assertNotEquals(compile_command("a = 1\n", "abc").co_filename,
 | 
				
			||||||
                             compile("a = 1\n", "def", 'single').co_filename)
 | 
					                             compile("a = 1\n", "def", 'single').co_filename)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_no_universal_newlines(self):
 | 
					 | 
				
			||||||
        code = compile_command("'\rfoo\r'", symbol='eval')
 | 
					 | 
				
			||||||
        self.assertEqual(eval(code), '\rfoo\r')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_main():
 | 
					def test_main():
 | 
				
			||||||
    run_unittest(CodeopTests)
 | 
					    run_unittest(CodeopTests)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,6 +5,19 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestSpecifics(unittest.TestCase):
 | 
					class TestSpecifics(unittest.TestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_no_ending_newline(self):
 | 
				
			||||||
 | 
					        compile("hi", "<test>", "exec")
 | 
				
			||||||
 | 
					        compile("hi\r", "<test>", "exec")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_empty(self):
 | 
				
			||||||
 | 
					        compile("", "<test>", "exec")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_other_newlines(self):
 | 
				
			||||||
 | 
					        compile("\r\n", "<test>", "exec")
 | 
				
			||||||
 | 
					        compile("\r", "<test>", "exec")
 | 
				
			||||||
 | 
					        compile("hi\r\nstuff\r\ndef f():\n    pass\r", "<test>", "exec")
 | 
				
			||||||
 | 
					        compile("this_is\rreally_old_mac\rdef f():\n    pass", "<test>", "exec")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_debug_assignment(self):
 | 
					    def test_debug_assignment(self):
 | 
				
			||||||
        # catch assignments to __debug__
 | 
					        # catch assignments to __debug__
 | 
				
			||||||
        self.assertRaises(SyntaxError, compile, '__debug__ = 1', '?', 'single')
 | 
					        self.assertRaises(SyntaxError, compile, '__debug__ = 1', '?', 'single')
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -237,9 +237,9 @@ def walk(tree):
 | 
				
			||||||
            (14, '+', 2, 13),
 | 
					            (14, '+', 2, 13),
 | 
				
			||||||
            (2, '1', 2, 15),
 | 
					            (2, '1', 2, 15),
 | 
				
			||||||
            (4, '', 2, 16),
 | 
					            (4, '', 2, 16),
 | 
				
			||||||
            (6, '', 2, -1),
 | 
					            (6, '', 3, -1),
 | 
				
			||||||
            (4, '', 2, -1),
 | 
					            (4, '', 3, -1),
 | 
				
			||||||
            (0, '', 2, -1)],
 | 
					            (0, '', 3, -1)],
 | 
				
			||||||
                         terminals)
 | 
					                         terminals)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_extended_unpacking(self):
 | 
					    def test_extended_unpacking(self):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -26,7 +26,7 @@ def test_issue2301(self):
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec")
 | 
					            compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec")
 | 
				
			||||||
        except SyntaxError as v:
 | 
					        except SyntaxError as v:
 | 
				
			||||||
            self.assertEquals(v.text, "print '\u5e74'")
 | 
					            self.assertEquals(v.text, "print '\u5e74'\n")
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            self.fail()
 | 
					            self.fail()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -46,13 +46,14 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
 | 
				
			||||||
		          perrdetail *err_ret, int *flags)
 | 
							          perrdetail *err_ret, int *flags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct tok_state *tok;
 | 
						struct tok_state *tok;
 | 
				
			||||||
 | 
						int exec_input = start == file_input;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	initerr(err_ret, filename);
 | 
						initerr(err_ret, filename);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (*flags & PyPARSE_IGNORE_COOKIE)
 | 
						if (*flags & PyPARSE_IGNORE_COOKIE)
 | 
				
			||||||
		tok = PyTokenizer_FromUTF8(s);
 | 
							tok = PyTokenizer_FromUTF8(s, exec_input);
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
		tok = PyTokenizer_FromString(s);
 | 
							tok = PyTokenizer_FromString(s, exec_input);
 | 
				
			||||||
	if (tok == NULL) {
 | 
						if (tok == NULL) {
 | 
				
			||||||
		err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
 | 
							err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -119,6 +119,7 @@ tok_new(void)
 | 
				
			||||||
	tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
 | 
						tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
 | 
				
			||||||
	tok->done = E_OK;
 | 
						tok->done = E_OK;
 | 
				
			||||||
	tok->fp = NULL;
 | 
						tok->fp = NULL;
 | 
				
			||||||
 | 
						tok->input = NULL;
 | 
				
			||||||
	tok->tabsize = TABSIZE;
 | 
						tok->tabsize = TABSIZE;
 | 
				
			||||||
	tok->indent = 0;
 | 
						tok->indent = 0;
 | 
				
			||||||
	tok->indstack[0] = 0;
 | 
						tok->indstack[0] = 0;
 | 
				
			||||||
| 
						 | 
					@ -145,6 +146,17 @@ tok_new(void)
 | 
				
			||||||
	return tok;
 | 
						return tok;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static char *
 | 
				
			||||||
 | 
					new_string(const char *s, Py_ssize_t len)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						char* result = (char *)PyMem_MALLOC(len + 1);
 | 
				
			||||||
 | 
						if (result != NULL) {
 | 
				
			||||||
 | 
							memcpy(result, s, len);
 | 
				
			||||||
 | 
							result[len] = '\0';
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return result;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef PGEN
 | 
					#ifdef PGEN
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static char *
 | 
					static char *
 | 
				
			||||||
| 
						 | 
					@ -159,10 +171,10 @@ decoding_feof(struct tok_state *tok)
 | 
				
			||||||
	return feof(tok->fp);
 | 
						return feof(tok->fp);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static const char *
 | 
					static char *
 | 
				
			||||||
decode_str(const char *str, struct tok_state *tok)
 | 
					decode_str(const char *str, int exec_input, struct tok_state *tok)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return str;
 | 
						return new_string(str, strlen(str));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#else /* PGEN */
 | 
					#else /* PGEN */
 | 
				
			||||||
| 
						 | 
					@ -177,16 +189,6 @@ error_ret(struct tok_state *tok) /* XXX */
 | 
				
			||||||
	return NULL;		/* as if it were EOF */
 | 
						return NULL;		/* as if it were EOF */
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static char *
 | 
					 | 
				
			||||||
new_string(const char *s, Py_ssize_t len)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	char* result = (char *)PyMem_MALLOC(len + 1);
 | 
					 | 
				
			||||||
	if (result != NULL) {
 | 
					 | 
				
			||||||
		memcpy(result, s, len);
 | 
					 | 
				
			||||||
		result[len] = '\0';
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return result;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
static char *
 | 
					static char *
 | 
				
			||||||
get_normal_name(char *s)	/* for utf-8 and latin-1 */
 | 
					get_normal_name(char *s)	/* for utf-8 and latin-1 */
 | 
				
			||||||
| 
						 | 
					@ -635,17 +637,63 @@ translate_into_utf8(const char* str, const char* enc) {
 | 
				
			||||||
	return utf8;
 | 
						return utf8;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static char *
 | 
				
			||||||
 | 
					translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
 | 
				
			||||||
 | 
						int skip_next_lf = 0, length = strlen(s), final_length;
 | 
				
			||||||
 | 
						char *buf, *current;
 | 
				
			||||||
 | 
						char c;
 | 
				
			||||||
 | 
						buf = PyMem_MALLOC(length + 2);
 | 
				
			||||||
 | 
						if (buf == NULL) {
 | 
				
			||||||
 | 
							tok->done = E_NOMEM;
 | 
				
			||||||
 | 
							return NULL;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						for (current = buf; (c = *s++);) {
 | 
				
			||||||
 | 
							if (skip_next_lf) {
 | 
				
			||||||
 | 
								skip_next_lf = 0;
 | 
				
			||||||
 | 
								if (c == '\n') {
 | 
				
			||||||
 | 
									c = *s;
 | 
				
			||||||
 | 
									s++;
 | 
				
			||||||
 | 
									if (!c)
 | 
				
			||||||
 | 
										break;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							if (c == '\r') {
 | 
				
			||||||
 | 
								skip_next_lf = 1;
 | 
				
			||||||
 | 
								c = '\n';
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							*current = c;
 | 
				
			||||||
 | 
							current++;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						/* If this is exec input, add a newline to the end of the file if
 | 
				
			||||||
 | 
						   there isn't one already. */
 | 
				
			||||||
 | 
						if (exec_input && *current != '\n') {
 | 
				
			||||||
 | 
							*current = '\n';
 | 
				
			||||||
 | 
							current++;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						*current = '\0';
 | 
				
			||||||
 | 
						final_length = current - buf;
 | 
				
			||||||
 | 
						if (final_length < length && final_length)
 | 
				
			||||||
 | 
							/* should never fail */
 | 
				
			||||||
 | 
							buf = PyMem_REALLOC(buf, final_length + 1);
 | 
				
			||||||
 | 
						return buf;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Decode a byte string STR for use as the buffer of TOK.
 | 
					/* Decode a byte string STR for use as the buffer of TOK.
 | 
				
			||||||
   Look for encoding declarations inside STR, and record them
 | 
					   Look for encoding declarations inside STR, and record them
 | 
				
			||||||
   inside TOK.  */
 | 
					   inside TOK.  */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static const char *
 | 
					static const char *
 | 
				
			||||||
decode_str(const char *str, struct tok_state *tok)
 | 
					decode_str(const char *input, int single, struct tok_state *tok)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	PyObject* utf8 = NULL;
 | 
						PyObject* utf8 = NULL;
 | 
				
			||||||
 | 
						const char *str;
 | 
				
			||||||
	const char *s;
 | 
						const char *s;
 | 
				
			||||||
	const char *newl[2] = {NULL, NULL};
 | 
						const char *newl[2] = {NULL, NULL};
 | 
				
			||||||
	int lineno = 0;
 | 
						int lineno = 0;
 | 
				
			||||||
 | 
						tok->input = str = translate_newlines(input, single, tok);
 | 
				
			||||||
 | 
						if (str == NULL)
 | 
				
			||||||
 | 
							return NULL;
 | 
				
			||||||
	tok->enc = NULL;
 | 
						tok->enc = NULL;
 | 
				
			||||||
	tok->str = str;
 | 
						tok->str = str;
 | 
				
			||||||
	if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
 | 
						if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
 | 
				
			||||||
| 
						 | 
					@ -696,12 +744,12 @@ decode_str(const char *str, struct tok_state *tok)
 | 
				
			||||||
/* Set up tokenizer for string */
 | 
					/* Set up tokenizer for string */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct tok_state *
 | 
					struct tok_state *
 | 
				
			||||||
PyTokenizer_FromString(const char *str)
 | 
					PyTokenizer_FromString(const char *str, int exec_input)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct tok_state *tok = tok_new();
 | 
						struct tok_state *tok = tok_new();
 | 
				
			||||||
	if (tok == NULL)
 | 
						if (tok == NULL)
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
	str = (char *)decode_str(str, tok);
 | 
						str = (char *)decode_str(str, exec_input, tok);
 | 
				
			||||||
	if (str == NULL) {
 | 
						if (str == NULL) {
 | 
				
			||||||
		PyTokenizer_Free(tok);
 | 
							PyTokenizer_Free(tok);
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
| 
						 | 
					@ -713,11 +761,18 @@ PyTokenizer_FromString(const char *str)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct tok_state *
 | 
					struct tok_state *
 | 
				
			||||||
PyTokenizer_FromUTF8(const char *str)
 | 
					PyTokenizer_FromUTF8(const char *str, int exec_input)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct tok_state *tok = tok_new();
 | 
						struct tok_state *tok = tok_new();
 | 
				
			||||||
	if (tok == NULL)
 | 
						if (tok == NULL)
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
 | 
					#ifndef PGEN
 | 
				
			||||||
 | 
						tok->input = str = translate_newlines(str, exec_input, tok);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
						if (str == NULL) {
 | 
				
			||||||
 | 
							PyTokenizer_Free(tok);
 | 
				
			||||||
 | 
							return NULL;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	tok->decoding_state = STATE_RAW;
 | 
						tok->decoding_state = STATE_RAW;
 | 
				
			||||||
	tok->read_coding_spec = 1;
 | 
						tok->read_coding_spec = 1;
 | 
				
			||||||
	tok->enc = NULL;
 | 
						tok->enc = NULL;
 | 
				
			||||||
| 
						 | 
					@ -734,7 +789,6 @@ PyTokenizer_FromUTF8(const char *str)
 | 
				
			||||||
	return tok;
 | 
						return tok;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Set up tokenizer for file */
 | 
					/* Set up tokenizer for file */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct tok_state *
 | 
					struct tok_state *
 | 
				
			||||||
| 
						 | 
					@ -780,6 +834,8 @@ PyTokenizer_Free(struct tok_state *tok)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	if (tok->fp != NULL && tok->buf != NULL)
 | 
						if (tok->fp != NULL && tok->buf != NULL)
 | 
				
			||||||
		PyMem_FREE(tok->buf);
 | 
							PyMem_FREE(tok->buf);
 | 
				
			||||||
 | 
						if (tok->input)
 | 
				
			||||||
 | 
							PyMem_FREE((char *)tok->input);
 | 
				
			||||||
	PyMem_FREE(tok);
 | 
						PyMem_FREE(tok);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -58,10 +58,11 @@ struct tok_state {
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	const char* enc;        /* Encoding for the current str. */
 | 
						const char* enc;        /* Encoding for the current str. */
 | 
				
			||||||
	const char* str;
 | 
						const char* str;
 | 
				
			||||||
 | 
						const char* input; /* Tokenizer's newline translated copy of the string. */
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern struct tok_state *PyTokenizer_FromString(const char *);
 | 
					extern struct tok_state *PyTokenizer_FromString(const char *, int);
 | 
				
			||||||
extern struct tok_state *PyTokenizer_FromUTF8(const char *);
 | 
					extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
 | 
				
			||||||
extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
 | 
					extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
 | 
				
			||||||
					      char *, char *);
 | 
										      char *, char *);
 | 
				
			||||||
extern void PyTokenizer_Free(struct tok_state *);
 | 
					extern void PyTokenizer_Free(struct tok_state *);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue