2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#include <Python.h>
							 | 
						
					
						
							
								
									
										
										
										
											2023-07-23 22:10:12 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#include "pycore_bytesobject.h"   // _PyBytes_DecodeEscape()
							 | 
						
					
						
							
								
									
										
										
										
											2023-07-04 09:29:52 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#include "pycore_unicodeobject.h" // _PyUnicode_DecodeUnicodeEscapeInternal()
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-10-11 17:14:44 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#include "lexer/state.h"
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#include "pegen.h"
							 | 
						
					
						
							
								
									
										
										
										
											2020-06-11 17:30:46 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#include "string_parser.h"
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2025-03-02 10:56:49 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#include <stdbool.h>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								//// STRING HANDLING FUNCTIONS ////
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								static int
							 | 
						
					
						
							
								
									
										
										
										
											2025-02-13 01:07:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								warn_invalid_escape_sequence(Parser *p, const char* buffer, const char *first_invalid_escape, Token *t)
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							
								
									
										
										
										
											2023-10-27 12:19:34 +09:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if (p->call_invalid_rules) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        // Do not report warnings if we are in the second pass of the parser
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        // to avoid showing the warning twice.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return 0;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							
								
									
										
										
										
											2024-09-18 19:10:56 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    unsigned char c = (unsigned char)*first_invalid_escape;
							 | 
						
					
						
							
								
									
										
										
										
											2023-10-12 09:34:35 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if ((t->type == FSTRING_MIDDLE || t->type == FSTRING_END) && (c == '{' || c == '}')) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        // in this case the tokenizer has already emitted a warning,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        // see Parser/tokenizer/helpers.c:warn_invalid_escape_sequence
							 | 
						
					
						
							
								
									
										
										
										
											2023-06-20 14:38:46 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return 0;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-04-30 13:16:27 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    int octal = ('4' <= c && c <= '7');
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    PyObject *msg =
							 | 
						
					
						
							
								
									
										
										
										
											2022-04-30 13:16:27 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        octal
							 | 
						
					
						
							
								
									
										
										
										
											2025-01-16 04:00:54 +11:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        ? PyUnicode_FromFormat(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								              "\"\\%.3s\" is an invalid octal escape sequence. "
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								              "Such sequences will not work in the future. "
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								              "Did you mean \"\\\\%.3s\"? A raw string is also an option.",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								              first_invalid_escape, first_invalid_escape)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        : PyUnicode_FromFormat(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								              "\"\\%c\" is an invalid escape sequence. "
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								              "Such sequences will not work in the future. "
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								              "Did you mean \"\\\\%c\"? A raw string is also an option.",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								              c, c);
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if (msg == NULL) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return -1;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							
								
									
										
										
										
											2022-11-03 17:53:25 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    PyObject *category;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if (p->feature_version >= 12) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        category = PyExc_SyntaxWarning;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    else {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        category = PyExc_DeprecationWarning;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							
								
									
										
										
										
											2025-02-13 01:07:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // Calculate the lineno and the col_offset of the invalid escape sequence
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    const char *start = buffer;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    const char *end = first_invalid_escape;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    int lineno = t->lineno;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    int col_offset = t->col_offset;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    while (start < end) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if (*start == '\n') {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            lineno++;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            col_offset = 0;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        else {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            col_offset++;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        start++;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // Count the number of quotes in the token
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    char first_quote = 0;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if (lineno == t->lineno) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        int quote_count = 0;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        char* tok = PyBytes_AsString(t->bytes);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        for (int i = 0; i < PyBytes_Size(t->bytes); i++) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if (tok[i] == '\'' || tok[i] == '\"') {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if (quote_count == 0) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    first_quote = tok[i];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if (tok[i] == first_quote) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    quote_count++;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            } else {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                break;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        col_offset += quote_count;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-11-03 17:53:25 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if (PyErr_WarnExplicitObject(category, msg, p->tok->filename,
							 | 
						
					
						
							
								
									
										
										
										
											2025-02-13 01:07:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                                 lineno, NULL, NULL) < 0) {
							 | 
						
					
						
							
								
									
										
										
										
											2022-11-03 17:53:25 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if (PyErr_ExceptionMatches(category)) {
							 | 
						
					
						
							
								
									
										
										
										
											2023-06-20 14:38:46 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            /* Replace the Syntax/DeprecationWarning exception with a SyntaxError
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								               to get a more accurate error report */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            PyErr_Clear();
							 | 
						
					
						
							
								
									
										
										
										
											2020-05-07 13:37:51 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            /* This is needed, in order for the SyntaxError to point to the token t,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								               since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								               error location, if p->known_err_token is not set. */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            p->known_err_token = t;
							 | 
						
					
						
							
								
									
										
										
										
											2022-04-30 13:16:27 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if (octal) {
							 | 
						
					
						
							
								
									
										
										
										
											2025-02-13 01:07:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1,
							 | 
						
					
						
							
								
									
										
										
										
											2025-01-16 04:00:54 +11:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    "\"\\%.3s\" is an invalid octal escape sequence. "
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    "Did you mean \"\\\\%.3s\"? A raw string is also an option.",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    first_invalid_escape, first_invalid_escape);
							 | 
						
					
						
							
								
									
										
										
										
											2022-04-30 13:16:27 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            else {
							 | 
						
					
						
							
								
									
										
										
										
											2025-02-13 01:07:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1,
							 | 
						
					
						
							
								
									
										
										
										
											2025-01-16 04:00:54 +11:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    "\"\\%c\" is an invalid escape sequence. "
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    "Did you mean \"\\\\%c\"? A raw string is also an option.",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    c, c);
							 | 
						
					
						
							
								
									
										
										
										
											2022-04-30 13:16:27 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            }
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Py_DECREF(msg);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return -1;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    Py_DECREF(msg);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return 0;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								static PyObject *
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								decode_utf8(const char **sPtr, const char *end)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							
								
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    const char *s;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    const char *t;
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    t = s = *sPtr;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    while (s < end && (*s & 0x80)) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        s++;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    *sPtr = s;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return PyUnicode_DecodeUTF8(t, s - t, NULL);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								static PyObject *
							 | 
						
					
						
							
								
									
										
										
										
											2020-05-07 13:37:51 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							
								
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    PyObject *v;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    PyObject *u;
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    char *buf;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    char *p;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    const char *end;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /* check for integer overflow */
							 | 
						
					
						
							
								
									
										
										
										
											2024-09-18 19:10:56 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if (len > (size_t)PY_SSIZE_T_MAX / 6) {
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return NULL;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								       "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
							 | 
						
					
						
							
								
									
										
										
										
											2024-09-18 19:10:56 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    u = PyBytes_FromStringAndSize((char *)NULL, (Py_ssize_t)len * 6);
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if (u == NULL) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return NULL;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    p = buf = PyBytes_AsString(u);
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-18 16:38:53 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if (p == NULL) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return NULL;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    end = s + len;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    while (s < end) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if (*s == '\\') {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            *p++ = *s++;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if (s >= end || *s & 0x80) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                strcpy(p, "u005c");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                p += 5;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if (s >= end) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    break;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if (*s & 0x80) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            PyObject *w;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            int kind;
							 | 
						
					
						
							
								
									
										
										
										
											2021-06-12 16:11:59 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            const void *data;
							 | 
						
					
						
							
								
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            Py_ssize_t w_len;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            Py_ssize_t i;
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            w = decode_utf8(&s, end);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if (w == NULL) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                Py_DECREF(u);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                return NULL;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            kind = PyUnicode_KIND(w);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            data = PyUnicode_DATA(w);
							 | 
						
					
						
							
								
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            w_len = PyUnicode_GET_LENGTH(w);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            for (i = 0; i < w_len; i++) {
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                Py_UCS4 chr = PyUnicode_READ(kind, data, i);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                sprintf(p, "\\U%08x", chr);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                p += 10;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            /* Should be impossible to overflow */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            assert(p - buf <= PyBytes_GET_SIZE(u));
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            Py_DECREF(w);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        else {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            *p++ = *s++;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							
								
									
										
										
										
											2024-09-18 19:10:56 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    len = (size_t)(p - buf);
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    s = buf;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    const char *first_invalid_escape;
							 | 
						
					
						
							
								
									
										
										
										
											2024-09-18 19:10:56 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    v = _PyUnicode_DecodeUnicodeEscapeInternal(s, (Py_ssize_t)len, NULL, NULL, &first_invalid_escape);
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    // HACK: later we can simply pass the line no, since we don't preserve the tokens
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // when we are decoding the string but we preserve the line numbers.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if (v != NULL && first_invalid_escape != NULL && t != NULL) {
							 | 
						
					
						
							
								
									
										
										
										
											2025-02-13 01:07:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) {
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            /* We have not decref u before because first_invalid_escape points
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								               inside u. */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            Py_XDECREF(u);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            Py_DECREF(v);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return NULL;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    Py_XDECREF(u);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return v;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								static PyObject *
							 | 
						
					
						
							
								
									
										
										
										
											2020-05-07 13:37:51 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    const char *first_invalid_escape;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if (result == NULL) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return NULL;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if (first_invalid_escape != NULL) {
							 | 
						
					
						
							
								
									
										
										
										
											2025-02-13 01:07:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) {
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            Py_DECREF(result);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return NULL;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return result;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								PyObject *
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								_PyPegen_decode_string(Parser *p, int raw, const char *s, size_t len, Token *t)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if (raw) {
							 | 
						
					
						
							
								
									
										
										
										
											2024-09-18 19:10:56 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return PyUnicode_DecodeUTF8Stateful(s, (Py_ssize_t)len, NULL, NULL);
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return decode_unicode_with_escapes(p, s, len, t);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								/* s must include the bracketing quote characters, and r, b &/or f prefixes
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    (if any), and embedded escape sequences (if any). (f-strings are handled by the parser)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   _PyPegen_parse_string parses it, and returns the decoded Python string object. */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								PyObject *
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								_PyPegen_parse_string(Parser *p, Token *t)
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							
								
									
										
										
										
											2020-05-07 13:37:51 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    const char *s = PyBytes_AsString(t->bytes);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if (s == NULL) {
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return NULL;
							 | 
						
					
						
							
								
									
										
										
										
											2020-05-07 13:37:51 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    size_t len;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    int quote = Py_CHARMASK(*s);
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    int bytesmode = 0;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    int rawmode = 0;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if (Py_ISALPHA(quote)) {
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        while (!bytesmode || !rawmode) {
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if (quote == 'b' || quote == 'B') {
							 | 
						
					
						
							
								
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                quote =(unsigned char)*++s;
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                bytesmode = 1;
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            else if (quote == 'u' || quote == 'U') {
							 | 
						
					
						
							
								
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                quote = (unsigned char)*++s;
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            else if (quote == 'r' || quote == 'R') {
							 | 
						
					
						
							
								
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                quote = (unsigned char)*++s;
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                rawmode = 1;
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            else {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                break;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if (quote != '\'' && quote != '\"') {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        PyErr_BadInternalCall();
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return NULL;
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							
								
									
										
										
										
											2024-06-25 18:53:24 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /* Skip the leading quote char. */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    s++;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    len = strlen(s);
							 | 
						
					
						
							
								
									
										
										
										
											2024-06-25 18:53:24 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    // gh-120155: 's' contains at least the trailing quote,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // so the code '--len' below is safe.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    assert(len >= 1);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if (len > INT_MAX) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        PyErr_SetString(PyExc_OverflowError, "string to parse is too long");
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return NULL;
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if (s[--len] != quote) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        /* Last quote char must match the first. */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        PyErr_BadInternalCall();
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return NULL;
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if (len >= 4 && s[0] == quote && s[1] == quote) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        /* A triple quoted string. We've already skipped one quote at
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								           the start and one at the end of the string. Now skip the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								           two at the start. */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        s += 2;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        len -= 2;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        /* And check that the last two match. */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if (s[--len] != quote || s[--len] != quote) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            PyErr_BadInternalCall();
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            return NULL;
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /* Avoid invoking escape decoding routines if possible. */
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    rawmode = rawmode || strchr(s, '\\') == NULL;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if (bytesmode) {
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        /* Disallow non-ASCII characters. */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        const char *ch;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        for (ch = s; *ch; ch++) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if (Py_CHARMASK(*ch) >= 0x80) {
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-23 03:08:27 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                   t,
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                   "bytes can only contain ASCII "
							 | 
						
					
						
							
								
									
										
										
										
											2021-01-24 09:56:57 +11:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                                   "literal characters");
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                return NULL;
							 | 
						
					
						
							
								
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            }
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if (rawmode) {
							 | 
						
					
						
							
								
									
										
										
										
											2024-09-18 19:10:56 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            return PyBytes_FromStringAndSize(s, (Py_ssize_t)len);
							 | 
						
					
						
							
								
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							
								
									
										
										
										
											2024-09-18 19:10:56 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return decode_bytes_with_escapes(p, s, (Py_ssize_t)len, t);
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return _PyPegen_decode_string(p, rawmode, s, len, t);
							 | 
						
					
						
							
								
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 |