| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | #include "Python.h"
 | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  | #include "errcode.h"
 | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | #include "../Parser/tokenizer.h"
 | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  | #include "../Parser/pegen.h"      // _PyPegen_byte_offset_to_character_offset()
 | 
					
						
							|  |  |  | #include "../Parser/pegen.h"      // _PyPegen_byte_offset_to_character_offset()
 | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | static struct PyModuleDef _tokenizemodule; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     PyTypeObject *TokenizerIter; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | } tokenize_state; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | static tokenize_state * | 
					
						
							|  |  |  | get_tokenize_state(PyObject *module) { | 
					
						
							|  |  |  |     return (tokenize_state *)PyModule_GetState(module); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | #define _tokenize_get_state_by_type(type) \
 | 
					
						
							| 
									
										
										
										
											2022-02-11 17:22:11 +01:00
										 |  |  |     get_tokenize_state(PyType_GetModuleByDef(type, &_tokenizemodule)) | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-11 15:25:49 -06:00
										 |  |  | #include "pycore_runtime.h"
 | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | #include "clinic/Python-tokenize.c.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*[clinic input]
 | 
					
						
							|  |  |  | module _tokenizer | 
					
						
							|  |  |  | class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_type(type)->TokenizerIter" | 
					
						
							|  |  |  | [clinic start generated code]*/ | 
					
						
							|  |  |  | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=96d98ee2fef7a8bc]*/ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | typedef struct | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     PyObject_HEAD struct tok_state *tok; | 
					
						
							| 
									
										
										
										
											2023-05-26 22:02:26 +01:00
										 |  |  |     int done; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | } tokenizeriterobject; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*[clinic input]
 | 
					
						
							|  |  |  | @classmethod | 
					
						
							|  |  |  | _tokenizer.tokenizeriter.__new__ as tokenizeriter_new | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-30 22:43:34 +01:00
										 |  |  |     readline: object | 
					
						
							|  |  |  |     / | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     * | 
					
						
							|  |  |  |     extra_tokens: bool | 
					
						
							| 
									
										
										
										
											2023-05-30 22:43:34 +01:00
										 |  |  |     encoding: str(c_default="NULL") = 'utf-8' | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | [clinic start generated code]*/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyObject * | 
					
						
							| 
									
										
										
										
											2023-05-30 22:43:34 +01:00
										 |  |  | tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline, | 
					
						
							|  |  |  |                        int extra_tokens, const char *encoding) | 
					
						
							|  |  |  | /*[clinic end generated code: output=7501a1211683ce16 input=f7dddf8a613ae8bd]*/ | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     tokenizeriterobject *self = (tokenizeriterobject *)type->tp_alloc(type, 0); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (self == NULL) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     PyObject *filename = PyUnicode_FromString("<string>"); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (filename == NULL) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-05-30 22:43:34 +01:00
										 |  |  |     self->tok = _PyTokenizer_FromReadline(readline, encoding, 1, 1); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (self->tok == NULL) { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |         Py_DECREF(filename); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     self->tok->filename = filename; | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     if (extra_tokens) { | 
					
						
							|  |  |  |         self->tok->tok_extra_tokens = 1; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-05-26 22:02:26 +01:00
										 |  |  |     self->done = 0; | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     return (PyObject *)self; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  | static int | 
					
						
							|  |  |  | _tokenizer_error(struct tok_state *tok) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (PyErr_Occurred()) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     const char *msg = NULL; | 
					
						
							|  |  |  |     PyObject* errtype = PyExc_SyntaxError; | 
					
						
							|  |  |  |     switch (tok->done) { | 
					
						
							|  |  |  |         case E_TOKEN: | 
					
						
							|  |  |  |             msg = "invalid token"; | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         case E_EOF: | 
					
						
							| 
									
										
										
										
											2023-06-07 12:04:40 +01:00
										 |  |  |             PyErr_SetString(PyExc_SyntaxError, "unexpected EOF in multi-line statement"); | 
					
						
							| 
									
										
										
										
											2023-06-07 23:20:43 +03:00
										 |  |  |             PyErr_SyntaxLocationObject(tok->filename, tok->lineno, | 
					
						
							|  |  |  |                                        tok->inp - tok->buf < 0 ? 0 : (int)(tok->inp - tok->buf)); | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |             return -1; | 
					
						
							|  |  |  |         case E_DEDENT: | 
					
						
							| 
									
										
										
										
											2023-05-22 13:30:18 +02:00
										 |  |  |             msg = "unindent does not match any outer indentation level"; | 
					
						
							|  |  |  |             errtype = PyExc_IndentationError; | 
					
						
							|  |  |  |             break; | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |         case E_INTR: | 
					
						
							|  |  |  |             if (!PyErr_Occurred()) { | 
					
						
							|  |  |  |                 PyErr_SetNone(PyExc_KeyboardInterrupt); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             return -1; | 
					
						
							|  |  |  |         case E_NOMEM: | 
					
						
							|  |  |  |             PyErr_NoMemory(); | 
					
						
							|  |  |  |             return -1; | 
					
						
							|  |  |  |         case E_TABSPACE: | 
					
						
							|  |  |  |             errtype = PyExc_TabError; | 
					
						
							|  |  |  |             msg = "inconsistent use of tabs and spaces in indentation"; | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         case E_TOODEEP: | 
					
						
							|  |  |  |             errtype = PyExc_IndentationError; | 
					
						
							|  |  |  |             msg = "too many levels of indentation"; | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         case E_LINECONT: { | 
					
						
							|  |  |  |             msg = "unexpected character after line continuation character"; | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         default: | 
					
						
							|  |  |  |             msg = "unknown tokenization error"; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     PyObject* errstr = NULL; | 
					
						
							|  |  |  |     PyObject* error_line = NULL; | 
					
						
							|  |  |  |     PyObject* tmp = NULL; | 
					
						
							|  |  |  |     PyObject* value = NULL; | 
					
						
							|  |  |  |     int result = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Py_ssize_t size = tok->inp - tok->buf; | 
					
						
							| 
									
										
										
										
											2023-05-24 10:59:18 +01:00
										 |  |  |     assert(tok->buf[size-1] == '\n'); | 
					
						
							|  |  |  |     size -= 1; // Remove the newline character from the end of the line
 | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     error_line = PyUnicode_DecodeUTF8(tok->buf, size, "replace"); | 
					
						
							|  |  |  |     if (!error_line) { | 
					
						
							|  |  |  |         result = -1; | 
					
						
							|  |  |  |         goto exit; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-22 13:30:18 +02:00
										 |  |  |     Py_ssize_t offset = _PyPegen_byte_offset_to_character_offset(error_line, tok->inp - tok->buf); | 
					
						
							|  |  |  |     if (offset == -1) { | 
					
						
							|  |  |  |         result = -1; | 
					
						
							|  |  |  |         goto exit; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     tmp = Py_BuildValue("(OnnOOO)", tok->filename, tok->lineno, offset, error_line, Py_None, Py_None); | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     if (!tmp) { | 
					
						
							|  |  |  |         result = -1; | 
					
						
							|  |  |  |         goto exit; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     errstr = PyUnicode_FromString(msg); | 
					
						
							|  |  |  |     if (!errstr) { | 
					
						
							|  |  |  |         result = -1; | 
					
						
							|  |  |  |         goto exit; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     value = PyTuple_Pack(2, errstr, tmp); | 
					
						
							|  |  |  |     if (!value) { | 
					
						
							|  |  |  |         result = -1; | 
					
						
							|  |  |  |         goto exit; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     PyErr_SetObject(errtype, value); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | exit: | 
					
						
							|  |  |  |     Py_XDECREF(errstr); | 
					
						
							|  |  |  |     Py_XDECREF(error_line); | 
					
						
							|  |  |  |     Py_XDECREF(tmp); | 
					
						
							|  |  |  |     Py_XDECREF(value); | 
					
						
							|  |  |  |     return result; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | static PyObject * | 
					
						
							|  |  |  | tokenizeriter_next(tokenizeriterobject *it) | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     PyObject* result = NULL; | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |     struct token token; | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     _PyToken_Init(&token); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |     int type = _PyTokenizer_Get(it->tok, &token); | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     if (type == ERRORTOKEN) { | 
					
						
							|  |  |  |         if(!PyErr_Occurred()) { | 
					
						
							|  |  |  |             _tokenizer_error(it->tok); | 
					
						
							|  |  |  |             assert(PyErr_Occurred()); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         goto exit; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-05-26 22:02:26 +01:00
										 |  |  |     if (it->done || type == ERRORTOKEN) { | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |         PyErr_SetString(PyExc_StopIteration, "EOF"); | 
					
						
							| 
									
										
										
										
											2023-05-26 22:02:26 +01:00
										 |  |  |         it->done = 1; | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |         goto exit; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     PyObject *str = NULL; | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |     if (token.start == NULL || token.end == NULL) { | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |         str = PyUnicode_FromString(""); | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |         str = PyUnicode_FromStringAndSize(token.start, token.end - token.start); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     } | 
					
						
							|  |  |  |     if (str == NULL) { | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |         goto exit; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-26 22:02:26 +01:00
										 |  |  |     int is_trailing_token = 0; | 
					
						
							|  |  |  |     if (type == ENDMARKER || (type == DEDENT && it->tok->done == E_EOF)) { | 
					
						
							|  |  |  |         is_trailing_token = 1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-26 15:46:22 +01:00
										 |  |  |     const char *line_start = ISSTRINGLIT(type) ? it->tok->multi_line_start : it->tok->line_start; | 
					
						
							| 
									
										
										
										
											2023-05-26 22:02:26 +01:00
										 |  |  |     PyObject* line = NULL; | 
					
						
							|  |  |  |     if (it->tok->tok_extra_tokens && is_trailing_token) { | 
					
						
							|  |  |  |         line = PyUnicode_FromString(""); | 
					
						
							|  |  |  |     } else { | 
					
						
							|  |  |  |         Py_ssize_t size = it->tok->inp - line_start; | 
					
						
							| 
									
										
										
										
											2023-06-09 17:01:26 +01:00
										 |  |  |         if (size >= 1 && it->tok->implicit_newline) { | 
					
						
							|  |  |  |             size -= 1; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-05-26 22:02:26 +01:00
										 |  |  |         line = PyUnicode_DecodeUTF8(line_start, size, "replace"); | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (line == NULL) { | 
					
						
							|  |  |  |         Py_DECREF(str); | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |         goto exit; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-05-26 15:46:22 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     Py_ssize_t lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno; | 
					
						
							|  |  |  |     Py_ssize_t end_lineno = it->tok->lineno; | 
					
						
							|  |  |  |     Py_ssize_t col_offset = -1; | 
					
						
							|  |  |  |     Py_ssize_t end_col_offset = -1; | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |     if (token.start != NULL && token.start >= line_start) { | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |         col_offset = _PyPegen_byte_offset_to_character_offset(line, token.start - line_start); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |     if (token.end != NULL && token.end >= it->tok->line_start) { | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |         end_col_offset = _PyPegen_byte_offset_to_character_offset(line, token.end - it->tok->line_start); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-22 02:29:04 +02:00
										 |  |  |     if (it->tok->tok_extra_tokens) { | 
					
						
							| 
									
										
										
										
											2023-05-26 22:02:26 +01:00
										 |  |  |         if (is_trailing_token) { | 
					
						
							|  |  |  |             lineno = end_lineno = lineno + 1; | 
					
						
							|  |  |  |             col_offset = end_col_offset = 0; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-05-22 02:29:04 +02:00
										 |  |  |         // Necessary adjustments to match the original Python tokenize
 | 
					
						
							|  |  |  |         // implementation
 | 
					
						
							|  |  |  |         if (type > DEDENT && type < OP) { | 
					
						
							|  |  |  |             type = OP; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else if (type == ASYNC || type == AWAIT) { | 
					
						
							|  |  |  |             type = NAME; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else if (type == NEWLINE) { | 
					
						
							| 
									
										
										
										
											2023-05-28 15:15:53 +01:00
										 |  |  |             Py_DECREF(str); | 
					
						
							| 
									
										
										
										
											2023-06-06 13:52:16 +02:00
										 |  |  |             if (!it->tok->implicit_newline) { | 
					
						
							|  |  |  |                 if (it->tok->start[0] == '\r') { | 
					
						
							|  |  |  |                     str = PyUnicode_FromString("\r\n"); | 
					
						
							|  |  |  |                 } else { | 
					
						
							|  |  |  |                     str = PyUnicode_FromString("\n"); | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2023-05-28 15:15:53 +01:00
										 |  |  |             } | 
					
						
							| 
									
										
										
										
											2023-05-22 02:29:04 +02:00
										 |  |  |             end_col_offset++; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-06-07 13:31:48 +01:00
										 |  |  |         else if (type == NL) { | 
					
						
							|  |  |  |             if (it->tok->implicit_newline) { | 
					
						
							|  |  |  |                 Py_DECREF(str); | 
					
						
							|  |  |  |                 str = PyUnicode_FromString(""); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (str == NULL) { | 
					
						
							|  |  |  |             Py_DECREF(line); | 
					
						
							|  |  |  |             goto exit; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-05-22 02:29:04 +02:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line); | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  | exit: | 
					
						
							|  |  |  |     _PyToken_Free(&token); | 
					
						
							| 
									
										
										
										
											2023-05-26 22:02:26 +01:00
										 |  |  |     if (type == ENDMARKER) { | 
					
						
							|  |  |  |         it->done = 1; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     return result; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | tokenizeriter_dealloc(tokenizeriterobject *it) | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     PyTypeObject *tp = Py_TYPE(it); | 
					
						
							| 
									
										
										
										
											2021-10-13 17:22:14 +02:00
										 |  |  |     _PyTokenizer_Free(it->tok); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     tp->tp_free(it); | 
					
						
							|  |  |  |     Py_DECREF(tp); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyType_Slot tokenizeriter_slots[] = { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     {Py_tp_new, tokenizeriter_new}, | 
					
						
							|  |  |  |     {Py_tp_dealloc, tokenizeriter_dealloc}, | 
					
						
							|  |  |  |     {Py_tp_getattro, PyObject_GenericGetAttr}, | 
					
						
							|  |  |  |     {Py_tp_iter, PyObject_SelfIter}, | 
					
						
							|  |  |  |     {Py_tp_iternext, tokenizeriter_next}, | 
					
						
							|  |  |  |     {0, NULL}, | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyType_Spec tokenizeriter_spec = { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     .name = "_tokenize.TokenizerIter", | 
					
						
							|  |  |  |     .basicsize = sizeof(tokenizeriterobject), | 
					
						
							|  |  |  |     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE), | 
					
						
							|  |  |  |     .slots = tokenizeriter_slots, | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | tokenizemodule_exec(PyObject *m) | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     tokenize_state *state = get_tokenize_state(m); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (state == NULL) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     state->TokenizerIter = (PyTypeObject *)PyType_FromModuleAndSpec(m, &tokenizeriter_spec, NULL); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (state->TokenizerIter == NULL) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (PyModule_AddType(m, state->TokenizerIter) < 0) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyMethodDef tokenize_methods[] = { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     {NULL, NULL, 0, NULL} /* Sentinel */ | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyModuleDef_Slot tokenizemodule_slots[] = { | 
					
						
							| 
									
										
										
										
											2021-10-03 16:58:14 +03:00
										 |  |  |     {Py_mod_exec, tokenizemodule_exec}, | 
					
						
							| 
									
										
										
										
											2023-05-05 15:11:27 -06:00
										 |  |  |     {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     {0, NULL} | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | tokenizemodule_traverse(PyObject *m, visitproc visit, void *arg) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     tokenize_state *state = get_tokenize_state(m); | 
					
						
							|  |  |  |     Py_VISIT(state->TokenizerIter); | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | tokenizemodule_clear(PyObject *m) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     tokenize_state *state = get_tokenize_state(m); | 
					
						
							|  |  |  |     Py_CLEAR(state->TokenizerIter); | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | tokenizemodule_free(void *m) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     tokenizemodule_clear((PyObject *)m); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static struct PyModuleDef _tokenizemodule = { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     PyModuleDef_HEAD_INIT, | 
					
						
							|  |  |  |     .m_name = "_tokenize", | 
					
						
							|  |  |  |     .m_size = sizeof(tokenize_state), | 
					
						
							|  |  |  |     .m_slots = tokenizemodule_slots, | 
					
						
							|  |  |  |     .m_methods = tokenize_methods, | 
					
						
							|  |  |  |     .m_traverse = tokenizemodule_traverse, | 
					
						
							|  |  |  |     .m_clear = tokenizemodule_clear, | 
					
						
							|  |  |  |     .m_free = tokenizemodule_free, | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PyMODINIT_FUNC | 
					
						
							|  |  |  | PyInit__tokenize(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     return PyModuleDef_Init(&_tokenizemodule); | 
					
						
							|  |  |  | } |