bpo-36440: include node names in ParserError messages, instead of numeric IDs (GH-12565)

The error messages in the parser module are referring to numeric IDs for the nodes. To improve readability, use the node names when reporting errors.
2025-11-11 02:52:04 +00:00 · 2019-04-03 08:12:07 +03:00 · 2019-04-03 08:12:07 +03:00 · cb0748d393
commit cb0748d393
parent 76b387bf74
3 changed files with 29 additions and 6 deletions
--- a/Lib/test/test_parser.py
+++ b/Lib/test/test_parser.py
@ -749,6 +749,22 @@ def test_illegal_encoding(self):
        with self.assertRaises(UnicodeEncodeError):
            parser.sequence2st(tree)
    def test_invalid_node_id(self):
        tree = (257, (269, (-7, '')))
        self.check_bad_tree(tree, "negative node id")
        tree = (257, (269, (99, '')))
        self.check_bad_tree(tree, "invalid token id")
        tree = (257, (269, (9999, (0, ''))))
        self.check_bad_tree(tree, "invalid symbol id")
    def test_ParserError_message(self):
        try:
            parser.sequence2st((257,(269,(257,(0,'')))))
        except parser.ParserError as why:
            self.assertIn("compound_stmt", str(why))  # Expected
            self.assertIn("file_input", str(why))     # Got
 class CompileTestCase(unittest.TestCase):
--- a/Builtins/2019-03-25-13-45-19.bpo-36440.gkvzhi.rst
+++ b/Builtins/2019-03-25-13-45-19.bpo-36440.gkvzhi.rst
@ -0,0 +1,2 @@
 Include node names in ``ParserError`` messages, instead of numeric IDs.
 Patch by A. Skrobov.
--- a/Modules/parsermodule.c
+++ b/Modules/parsermodule.c
@ -24,10 +24,6 @@
 *  Py_[X]DECREF() and Py_[X]INCREF() macros.  The lint annotations
 *  look like "NOTE(...)".
 *
 *  To debug parser errors like
 *      "parser.ParserError: Expected node type 12, got 333."
 *  decode symbol numbers using the automatically-generated files
 *  Lib/symbol.h and Include/token.h.
 */
 #include "Python.h"                     /* general Python API             */
@ -666,6 +662,13 @@ validate_node(node *tree)
    for (pos = 0; pos < nch; ++pos) {
        node *ch = CHILD(tree, pos);
        int ch_type = TYPE(ch);
        if ((ch_type >= NT_OFFSET + _PyParser_Grammar.g_ndfas)
            || (ISTERMINAL(ch_type) && (ch_type >= N_TOKENS))
            || (ch_type < 0)
           ) {
            PyErr_Format(parser_error, "Unrecognized node type %d.", ch_type);
            return 0;
        }
        if (ch_type == suite && TYPE(tree) == funcdef) {
            /* This is the opposite hack of what we do in parser.c
               (search for func_body_suite), except we don't ever
@ -700,8 +703,10 @@ validate_node(node *tree)
            const char *expected_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str;
            if (ISNONTERMINAL(next_type)) {
-                PyErr_Format(parser_error, "Expected node type %d, got %d.",
+                PyErr_Format(parser_error, "Expected %s, got %s.",
-                             next_type, ch_type);
+                             _PyParser_Grammar.g_dfa[next_type - NT_OFFSET].d_name,
                             ISTERMINAL(ch_type) ? _PyParser_TokenNames[ch_type] :
                             _PyParser_Grammar.g_dfa[ch_type - NT_OFFSET].d_name);
            }
            else if (expected_str != NULL) {
                PyErr_Format(parser_error, "Illegal terminal: expected '%s'.",
		`@ -0,0 +1,2 @@`
							Include node names in ``ParserError`` messages, instead of numeric IDs.
							`Patch by A. Skrobov.`