| 
									
										
										
										
											2020-07-16 06:07:29 -07:00
										 |  |  | #include <stdbool.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | #include <Python.h>
 | 
					
						
							| 
									
										
										
										
											2023-07-23 22:10:12 +02:00
										 |  |  | #include "pycore_bytesobject.h"   // _PyBytes_DecodeEscape()
 | 
					
						
							| 
									
										
										
										
											2023-07-04 09:29:52 +02:00
										 |  |  | #include "pycore_unicodeobject.h" // _PyUnicode_DecodeUnicodeEscapeInternal()
 | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-11 17:14:44 +02:00
										 |  |  | #include "lexer/state.h"
 | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | #include "pegen.h"
 | 
					
						
							| 
									
										
										
										
											2020-06-11 17:30:46 +01:00
										 |  |  | #include "string_parser.h"
 | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | //// STRING HANDLING FUNCTIONS ////
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							| 
									
										
										
										
											2025-02-13 01:49:25 +00:00
										 |  |  | warn_invalid_escape_sequence(Parser *p, const char* buffer, const char *first_invalid_escape, Token *t) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2023-10-27 12:19:34 +09:00
										 |  |  |     if (p->call_invalid_rules) { | 
					
						
							|  |  |  |         // Do not report warnings if we are in the second pass of the parser
 | 
					
						
							|  |  |  |         // to avoid showing the warning twice.
 | 
					
						
							|  |  |  |         return 0; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2024-09-30 03:13:13 +02:00
										 |  |  |     unsigned char c = (unsigned char)*first_invalid_escape; | 
					
						
							| 
									
										
										
										
											2023-10-12 09:34:35 +02:00
										 |  |  |     if ((t->type == FSTRING_MIDDLE || t->type == FSTRING_END) && (c == '{' || c == '}')) { | 
					
						
							|  |  |  |         // in this case the tokenizer has already emitted a warning,
 | 
					
						
							|  |  |  |         // see Parser/tokenizer/helpers.c:warn_invalid_escape_sequence
 | 
					
						
							| 
									
										
										
										
											2023-06-20 14:38:46 +02:00
										 |  |  |         return 0; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-30 13:16:27 +03:00
										 |  |  |     int octal = ('4' <= c && c <= '7'); | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     PyObject *msg = | 
					
						
							| 
									
										
										
										
											2022-04-30 13:16:27 +03:00
										 |  |  |         octal | 
					
						
							|  |  |  |         ? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'", | 
					
						
							|  |  |  |                                first_invalid_escape) | 
					
						
							|  |  |  |         : PyUnicode_FromFormat("invalid escape sequence '\\%c'", c); | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     if (msg == NULL) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2022-11-03 17:53:25 +01:00
										 |  |  |     PyObject *category; | 
					
						
							|  |  |  |     if (p->feature_version >= 12) { | 
					
						
							|  |  |  |         category = PyExc_SyntaxWarning; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							|  |  |  |         category = PyExc_DeprecationWarning; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2025-02-13 01:49:25 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     // Calculate the lineno and the col_offset of the invalid escape sequence
 | 
					
						
							|  |  |  |     const char *start = buffer; | 
					
						
							|  |  |  |     const char *end = first_invalid_escape; | 
					
						
							|  |  |  |     int lineno = t->lineno; | 
					
						
							|  |  |  |     int col_offset = t->col_offset; | 
					
						
							|  |  |  |     while (start < end) { | 
					
						
							|  |  |  |         if (*start == '\n') { | 
					
						
							|  |  |  |             lineno++; | 
					
						
							|  |  |  |             col_offset = 0; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else { | 
					
						
							|  |  |  |             col_offset++; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         start++; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Count the number of quotes in the token
 | 
					
						
							|  |  |  |     char first_quote = 0; | 
					
						
							|  |  |  |     if (lineno == t->lineno) { | 
					
						
							|  |  |  |         int quote_count = 0; | 
					
						
							|  |  |  |         char* tok = PyBytes_AsString(t->bytes); | 
					
						
							|  |  |  |         for (int i = 0; i < PyBytes_Size(t->bytes); i++) { | 
					
						
							|  |  |  |             if (tok[i] == '\'' || tok[i] == '\"') { | 
					
						
							|  |  |  |                 if (quote_count == 0) { | 
					
						
							|  |  |  |                     first_quote = tok[i]; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 if (tok[i] == first_quote) { | 
					
						
							|  |  |  |                     quote_count++; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } else { | 
					
						
							|  |  |  |                 break; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         col_offset += quote_count; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-11-03 17:53:25 +01:00
										 |  |  |     if (PyErr_WarnExplicitObject(category, msg, p->tok->filename, | 
					
						
							| 
									
										
										
										
											2025-02-13 01:49:25 +00:00
										 |  |  |                                  lineno, NULL, NULL) < 0) { | 
					
						
							| 
									
										
										
										
											2022-11-03 17:53:25 +01:00
										 |  |  |         if (PyErr_ExceptionMatches(category)) { | 
					
						
							| 
									
										
										
										
											2023-06-20 14:38:46 +02:00
										 |  |  |             /* Replace the Syntax/DeprecationWarning exception with a SyntaxError
 | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |                to get a more accurate error report */ | 
					
						
							|  |  |  |             PyErr_Clear(); | 
					
						
							| 
									
										
										
										
											2020-05-07 13:37:51 +03:00
										 |  |  | 
 | 
					
						
							|  |  |  |             /* This is needed, in order for the SyntaxError to point to the token t,
 | 
					
						
							|  |  |  |                since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the | 
					
						
							|  |  |  |                error location, if p->known_err_token is not set. */ | 
					
						
							|  |  |  |             p->known_err_token = t; | 
					
						
							| 
									
										
										
										
											2022-04-30 13:16:27 +03:00
										 |  |  |             if (octal) { | 
					
						
							| 
									
										
										
										
											2025-02-13 01:49:25 +00:00
										 |  |  |                 RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1, | 
					
						
							|  |  |  |                 "invalid octal escape sequence '\\%.3s'", first_invalid_escape); | 
					
						
							| 
									
										
										
										
											2022-04-30 13:16:27 +03:00
										 |  |  |             } | 
					
						
							|  |  |  |             else { | 
					
						
							| 
									
										
										
										
											2025-02-13 01:49:25 +00:00
										 |  |  |                 RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1, | 
					
						
							|  |  |  |                 "invalid escape sequence '\\%c'", c); | 
					
						
							| 
									
										
										
										
											2022-04-30 13:16:27 +03:00
										 |  |  |             } | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |         } | 
					
						
							|  |  |  |         Py_DECREF(msg); | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     Py_DECREF(msg); | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyObject * | 
					
						
							|  |  |  | decode_utf8(const char **sPtr, const char *end) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 |  |  |     const char *s; | 
					
						
							|  |  |  |     const char *t; | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     t = s = *sPtr; | 
					
						
							|  |  |  |     while (s < end && (*s & 0x80)) { | 
					
						
							|  |  |  |         s++; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     *sPtr = s; | 
					
						
							|  |  |  |     return PyUnicode_DecodeUTF8(t, s - t, NULL); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyObject * | 
					
						
							| 
									
										
										
										
											2020-05-07 13:37:51 +03:00
										 |  |  | decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 |  |  |     PyObject *v; | 
					
						
							|  |  |  |     PyObject *u; | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     char *buf; | 
					
						
							|  |  |  |     char *p; | 
					
						
							|  |  |  |     const char *end; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* check for integer overflow */ | 
					
						
							| 
									
										
										
										
											2024-09-30 03:13:13 +02:00
										 |  |  |     if (len > (size_t)PY_SSIZE_T_MAX / 6) { | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
 | 
					
						
							|  |  |  |        "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */ | 
					
						
							| 
									
										
										
										
											2024-09-30 03:13:13 +02:00
										 |  |  |     u = PyBytes_FromStringAndSize((char *)NULL, (Py_ssize_t)len * 6); | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     if (u == NULL) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     p = buf = PyBytes_AsString(u); | 
					
						
							| 
									
										
										
										
											2020-11-18 16:38:53 +01:00
										 |  |  |     if (p == NULL) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     end = s + len; | 
					
						
							|  |  |  |     while (s < end) { | 
					
						
							|  |  |  |         if (*s == '\\') { | 
					
						
							|  |  |  |             *p++ = *s++; | 
					
						
							|  |  |  |             if (s >= end || *s & 0x80) { | 
					
						
							|  |  |  |                 strcpy(p, "u005c"); | 
					
						
							|  |  |  |                 p += 5; | 
					
						
							|  |  |  |                 if (s >= end) { | 
					
						
							|  |  |  |                     break; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         if (*s & 0x80) { | 
					
						
							|  |  |  |             PyObject *w; | 
					
						
							|  |  |  |             int kind; | 
					
						
							| 
									
										
										
										
											2021-06-12 16:11:59 +03:00
										 |  |  |             const void *data; | 
					
						
							| 
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 |  |  |             Py_ssize_t w_len; | 
					
						
							|  |  |  |             Py_ssize_t i; | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |             w = decode_utf8(&s, end); | 
					
						
							|  |  |  |             if (w == NULL) { | 
					
						
							|  |  |  |                 Py_DECREF(u); | 
					
						
							|  |  |  |                 return NULL; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             kind = PyUnicode_KIND(w); | 
					
						
							|  |  |  |             data = PyUnicode_DATA(w); | 
					
						
							| 
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 |  |  |             w_len = PyUnicode_GET_LENGTH(w); | 
					
						
							|  |  |  |             for (i = 0; i < w_len; i++) { | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |                 Py_UCS4 chr = PyUnicode_READ(kind, data, i); | 
					
						
							|  |  |  |                 sprintf(p, "\\U%08x", chr); | 
					
						
							|  |  |  |                 p += 10; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             /* Should be impossible to overflow */ | 
					
						
							|  |  |  |             assert(p - buf <= PyBytes_GET_SIZE(u)); | 
					
						
							|  |  |  |             Py_DECREF(w); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else { | 
					
						
							|  |  |  |             *p++ = *s++; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2024-09-30 03:13:13 +02:00
										 |  |  |     len = (size_t)(p - buf); | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     s = buf; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     const char *first_invalid_escape; | 
					
						
							| 
									
										
										
										
											2024-09-30 03:13:13 +02:00
										 |  |  |     v = _PyUnicode_DecodeUnicodeEscapeInternal(s, (Py_ssize_t)len, NULL, NULL, &first_invalid_escape); | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |     // HACK: later we can simply pass the line no, since we don't preserve the tokens
 | 
					
						
							|  |  |  |     // when we are decoding the string but we preserve the line numbers.
 | 
					
						
							|  |  |  |     if (v != NULL && first_invalid_escape != NULL && t != NULL) { | 
					
						
							| 
									
										
										
										
											2025-02-13 01:49:25 +00:00
										 |  |  |         if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) { | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |             /* We have not decref u before because first_invalid_escape points
 | 
					
						
							|  |  |  |                inside u. */ | 
					
						
							|  |  |  |             Py_XDECREF(u); | 
					
						
							|  |  |  |             Py_DECREF(v); | 
					
						
							|  |  |  |             return NULL; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     Py_XDECREF(u); | 
					
						
							|  |  |  |     return v; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyObject * | 
					
						
							| 
									
										
										
										
											2020-05-07 13:37:51 +03:00
										 |  |  | decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | { | 
					
						
							|  |  |  |     const char *first_invalid_escape; | 
					
						
							|  |  |  |     PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape); | 
					
						
							|  |  |  |     if (result == NULL) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (first_invalid_escape != NULL) { | 
					
						
							| 
									
										
										
										
											2025-02-13 01:49:25 +00:00
										 |  |  |         if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) { | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |             Py_DECREF(result); | 
					
						
							|  |  |  |             return NULL; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return result; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  | PyObject * | 
					
						
							|  |  |  | _PyPegen_decode_string(Parser *p, int raw, const char *s, size_t len, Token *t) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (raw) { | 
					
						
							| 
									
										
										
										
											2024-09-30 03:13:13 +02:00
										 |  |  |         return PyUnicode_DecodeUTF8Stateful(s, (Py_ssize_t)len, NULL, NULL); | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |     } | 
					
						
							|  |  |  |     return decode_unicode_with_escapes(p, s, len, t); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* s must include the bracketing quote characters, and r, b &/or f prefixes
 | 
					
						
							|  |  |  |     (if any), and embedded escape sequences (if any). (f-strings are handled by the parser) | 
					
						
							|  |  |  |    _PyPegen_parse_string parses it, and returns the decoded Python string object. */ | 
					
						
							|  |  |  | PyObject * | 
					
						
							|  |  |  | _PyPegen_parse_string(Parser *p, Token *t) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2020-05-07 13:37:51 +03:00
										 |  |  |     const char *s = PyBytes_AsString(t->bytes); | 
					
						
							|  |  |  |     if (s == NULL) { | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |         return NULL; | 
					
						
							| 
									
										
										
										
											2020-05-07 13:37:51 +03:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     size_t len; | 
					
						
							|  |  |  |     int quote = Py_CHARMASK(*s); | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |     int bytesmode = 0; | 
					
						
							|  |  |  |     int rawmode = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     if (Py_ISALPHA(quote)) { | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |         while (!bytesmode || !rawmode) { | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |             if (quote == 'b' || quote == 'B') { | 
					
						
							| 
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 |  |  |                 quote =(unsigned char)*++s; | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |                 bytesmode = 1; | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |             } | 
					
						
							|  |  |  |             else if (quote == 'u' || quote == 'U') { | 
					
						
							| 
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 |  |  |                 quote = (unsigned char)*++s; | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |             } | 
					
						
							|  |  |  |             else if (quote == 'r' || quote == 'R') { | 
					
						
							| 
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 |  |  |                 quote = (unsigned char)*++s; | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |                 rawmode = 1; | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |             } | 
					
						
							|  |  |  |             else { | 
					
						
							|  |  |  |                 break; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (quote != '\'' && quote != '\"') { | 
					
						
							|  |  |  |         PyErr_BadInternalCall(); | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |         return NULL; | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2024-06-25 19:40:05 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     /* Skip the leading quote char. */ | 
					
						
							|  |  |  |     s++; | 
					
						
							|  |  |  |     len = strlen(s); | 
					
						
							| 
									
										
										
										
											2024-06-25 19:40:05 +02:00
										 |  |  |     // gh-120155: 's' contains at least the trailing quote,
 | 
					
						
							|  |  |  |     // so the code '--len' below is safe.
 | 
					
						
							|  |  |  |     assert(len >= 1); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     if (len > INT_MAX) { | 
					
						
							|  |  |  |         PyErr_SetString(PyExc_OverflowError, "string to parse is too long"); | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |         return NULL; | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     } | 
					
						
							|  |  |  |     if (s[--len] != quote) { | 
					
						
							|  |  |  |         /* Last quote char must match the first. */ | 
					
						
							|  |  |  |         PyErr_BadInternalCall(); | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |         return NULL; | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     } | 
					
						
							|  |  |  |     if (len >= 4 && s[0] == quote && s[1] == quote) { | 
					
						
							|  |  |  |         /* A triple quoted string. We've already skipped one quote at
 | 
					
						
							|  |  |  |            the start and one at the end of the string. Now skip the | 
					
						
							|  |  |  |            two at the start. */ | 
					
						
							|  |  |  |         s += 2; | 
					
						
							|  |  |  |         len -= 2; | 
					
						
							|  |  |  |         /* And check that the last two match. */ | 
					
						
							|  |  |  |         if (s[--len] != quote || s[--len] != quote) { | 
					
						
							|  |  |  |             PyErr_BadInternalCall(); | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |             return NULL; | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Avoid invoking escape decoding routines if possible. */ | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |     rawmode = rawmode || strchr(s, '\\') == NULL; | 
					
						
							|  |  |  |     if (bytesmode) { | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |         /* Disallow non-ASCII characters. */ | 
					
						
							|  |  |  |         const char *ch; | 
					
						
							|  |  |  |         for (ch = s; *ch; ch++) { | 
					
						
							|  |  |  |             if (Py_CHARMASK(*ch) >= 0x80) { | 
					
						
							| 
									
										
										
										
											2023-04-23 03:08:27 +03:00
										 |  |  |                 RAISE_SYNTAX_ERROR_KNOWN_LOCATION( | 
					
						
							|  |  |  |                                    t, | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |                                    "bytes can only contain ASCII " | 
					
						
							| 
									
										
										
										
											2021-01-24 09:56:57 +11:00
										 |  |  |                                    "literal characters"); | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |                 return NULL; | 
					
						
							| 
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 |  |  |             } | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |         if (rawmode) { | 
					
						
							| 
									
										
										
										
											2024-09-30 03:13:13 +02:00
										 |  |  |             return PyBytes_FromStringAndSize(s, (Py_ssize_t)len); | 
					
						
							| 
									
										
										
										
											2020-06-15 14:23:43 +01:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2024-09-30 03:13:13 +02:00
										 |  |  |         return decode_bytes_with_escapes(p, s, (Py_ssize_t)len, t); | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |     return _PyPegen_decode_string(p, rawmode, s, len, t); | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | } |