diff --git a/Lib/test/bad_coding2.py b/Lib/test/bad_coding2.py new file mode 100644 index 00000000000..604b122405e --- /dev/null +++ b/Lib/test/bad_coding2.py @@ -0,0 +1,2 @@ +#coding: utf8 +print '我' \ No newline at end of file diff --git a/Lib/test/test_coding.py b/Lib/test/test_coding.py index aa7241d9470..e83015e5431 100644 --- a/Lib/test/test_coding.py +++ b/Lib/test/test_coding.py @@ -5,6 +5,13 @@ class CodingTest(unittest.TestCase): def test_bad_coding(self): module_name = 'bad_coding' + self.verify_bad_module(module_name) + + def test_bad_coding2(self): + module_name = 'bad_coding2' + self.verify_bad_module(module_name) + + def verify_bad_module(self, module_name): self.assertRaises(SyntaxError, __import__, 'test.' + module_name) path = os.path.dirname(__file__) diff --git a/Misc/NEWS b/Misc/NEWS index 1db35f44b90..7b27e49cb00 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,8 @@ What's New in Python 2.5 alpha 1? Core and builtins ----------------- +- Bug #1378022, UTF-8 files with a leading BOM crashed the interpreter. + - Support for converting hex strings to floats no longer works. This was not portable. float('0x3') now raises a ValueError. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 37e6c3349ba..a79ea811294 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -292,6 +292,12 @@ check_coding_spec(const char* line, int size, struct tok_state *tok, PyMem_DEL(cs); } } + if (!r) { + cs = tok->encoding; + if (!cs) + cs = "with BOM"; + PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs); + } return r; } diff --git a/Python/pythonrun.c b/Python/pythonrun.c index a7f4fe7ceed..30cb518a65d 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -1439,8 +1439,8 @@ err_input(perrdetail *err) } if (msg == NULL) msg = "unknown decode error"; - Py_DECREF(type); - Py_DECREF(value); + Py_XDECREF(type); + Py_XDECREF(value); Py_XDECREF(tb); break; }