| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | #include "Python.h"
 | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  | #include "errcode.h"
 | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | #include "../Parser/tokenizer.h"
 | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  | #include "../Parser/pegen.h"      // _PyPegen_byte_offset_to_character_offset()
 | 
					
						
							|  |  |  | #include "../Parser/pegen.h"      // _PyPegen_byte_offset_to_character_offset()
 | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | static struct PyModuleDef _tokenizemodule; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     PyTypeObject *TokenizerIter; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | } tokenize_state; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | static tokenize_state * | 
					
						
							|  |  |  | get_tokenize_state(PyObject *module) { | 
					
						
							|  |  |  |     return (tokenize_state *)PyModule_GetState(module); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | #define _tokenize_get_state_by_type(type) \
 | 
					
						
							| 
									
										
										
										
											2022-02-11 17:22:11 +01:00
										 |  |  |     get_tokenize_state(PyType_GetModuleByDef(type, &_tokenizemodule)) | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-11 15:25:49 -06:00
										 |  |  | #include "pycore_runtime.h"
 | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | #include "clinic/Python-tokenize.c.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*[clinic input]
 | 
					
						
							|  |  |  | module _tokenizer | 
					
						
							|  |  |  | class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_type(type)->TokenizerIter" | 
					
						
							|  |  |  | [clinic start generated code]*/ | 
					
						
							|  |  |  | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=96d98ee2fef7a8bc]*/ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | typedef struct | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     PyObject_HEAD struct tok_state *tok; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | } tokenizeriterobject; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*[clinic input]
 | 
					
						
							|  |  |  | @classmethod | 
					
						
							|  |  |  | _tokenizer.tokenizeriter.__new__ as tokenizeriter_new | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     source: str | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     * | 
					
						
							|  |  |  |     extra_tokens: bool | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | [clinic start generated code]*/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyObject * | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  | tokenizeriter_new_impl(PyTypeObject *type, const char *source, | 
					
						
							|  |  |  |                        int extra_tokens) | 
					
						
							|  |  |  | /*[clinic end generated code: output=f6f9d8b4beec8106 input=90dc5b6a5df180c2]*/ | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     tokenizeriterobject *self = (tokenizeriterobject *)type->tp_alloc(type, 0); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (self == NULL) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     PyObject *filename = PyUnicode_FromString("<string>"); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (filename == NULL) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-10-13 17:22:14 +02:00
										 |  |  |     self->tok = _PyTokenizer_FromUTF8(source, 1); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (self->tok == NULL) { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |         Py_DECREF(filename); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     self->tok->filename = filename; | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     if (extra_tokens) { | 
					
						
							|  |  |  |         self->tok->tok_extra_tokens = 1; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     return (PyObject *)self; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  | static int | 
					
						
							|  |  |  | _tokenizer_error(struct tok_state *tok) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (PyErr_Occurred()) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     const char *msg = NULL; | 
					
						
							|  |  |  |     PyObject* errtype = PyExc_SyntaxError; | 
					
						
							|  |  |  |     switch (tok->done) { | 
					
						
							|  |  |  |         case E_TOKEN: | 
					
						
							|  |  |  |             msg = "invalid token"; | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         case E_EOF: | 
					
						
							|  |  |  |             if (tok->level) { | 
					
						
							|  |  |  |                     PyErr_Format(PyExc_SyntaxError, | 
					
						
							|  |  |  |                                  "parenthesis '%c' was never closed", | 
					
						
							|  |  |  |                                 tok->parenstack[tok->level-1]); | 
					
						
							|  |  |  |             } else { | 
					
						
							|  |  |  |                 PyErr_SetString(PyExc_SyntaxError, "unexpected EOF while parsing"); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             return -1; | 
					
						
							|  |  |  |         case E_DEDENT: | 
					
						
							| 
									
										
										
										
											2023-05-22 13:30:18 +02:00
										 |  |  |             msg = "unindent does not match any outer indentation level"; | 
					
						
							|  |  |  |             errtype = PyExc_IndentationError; | 
					
						
							|  |  |  |             break; | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |         case E_INTR: | 
					
						
							|  |  |  |             if (!PyErr_Occurred()) { | 
					
						
							|  |  |  |                 PyErr_SetNone(PyExc_KeyboardInterrupt); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             return -1; | 
					
						
							|  |  |  |         case E_NOMEM: | 
					
						
							|  |  |  |             PyErr_NoMemory(); | 
					
						
							|  |  |  |             return -1; | 
					
						
							|  |  |  |         case E_TABSPACE: | 
					
						
							|  |  |  |             errtype = PyExc_TabError; | 
					
						
							|  |  |  |             msg = "inconsistent use of tabs and spaces in indentation"; | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         case E_TOODEEP: | 
					
						
							|  |  |  |             errtype = PyExc_IndentationError; | 
					
						
							|  |  |  |             msg = "too many levels of indentation"; | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         case E_LINECONT: { | 
					
						
							|  |  |  |             msg = "unexpected character after line continuation character"; | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         default: | 
					
						
							|  |  |  |             msg = "unknown tokenization error"; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     PyObject* errstr = NULL; | 
					
						
							|  |  |  |     PyObject* error_line = NULL; | 
					
						
							|  |  |  |     PyObject* tmp = NULL; | 
					
						
							|  |  |  |     PyObject* value = NULL; | 
					
						
							|  |  |  |     int result = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Py_ssize_t size = tok->inp - tok->buf; | 
					
						
							|  |  |  |     error_line = PyUnicode_DecodeUTF8(tok->buf, size, "replace"); | 
					
						
							|  |  |  |     if (!error_line) { | 
					
						
							|  |  |  |         result = -1; | 
					
						
							|  |  |  |         goto exit; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-22 13:30:18 +02:00
										 |  |  |     Py_ssize_t offset = _PyPegen_byte_offset_to_character_offset(error_line, tok->inp - tok->buf); | 
					
						
							|  |  |  |     if (offset == -1) { | 
					
						
							|  |  |  |         result = -1; | 
					
						
							|  |  |  |         goto exit; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     tmp = Py_BuildValue("(OnnOOO)", tok->filename, tok->lineno, offset, error_line, Py_None, Py_None); | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     if (!tmp) { | 
					
						
							|  |  |  |         result = -1; | 
					
						
							|  |  |  |         goto exit; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     errstr = PyUnicode_FromString(msg); | 
					
						
							|  |  |  |     if (!errstr) { | 
					
						
							|  |  |  |         result = -1; | 
					
						
							|  |  |  |         goto exit; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     value = PyTuple_Pack(2, errstr, tmp); | 
					
						
							|  |  |  |     if (!value) { | 
					
						
							|  |  |  |         result = -1; | 
					
						
							|  |  |  |         goto exit; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     PyErr_SetObject(errtype, value); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | exit: | 
					
						
							|  |  |  |     Py_XDECREF(errstr); | 
					
						
							|  |  |  |     Py_XDECREF(error_line); | 
					
						
							|  |  |  |     Py_XDECREF(tmp); | 
					
						
							|  |  |  |     Py_XDECREF(value); | 
					
						
							|  |  |  |     return result; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | static PyObject * | 
					
						
							|  |  |  | tokenizeriter_next(tokenizeriterobject *it) | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     PyObject* result = NULL; | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |     struct token token; | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     _PyToken_Init(&token); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |     int type = _PyTokenizer_Get(it->tok, &token); | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     if (type == ERRORTOKEN) { | 
					
						
							|  |  |  |         if(!PyErr_Occurred()) { | 
					
						
							|  |  |  |             _tokenizer_error(it->tok); | 
					
						
							|  |  |  |             assert(PyErr_Occurred()); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         goto exit; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     } | 
					
						
							|  |  |  |     if (type == ERRORTOKEN || type == ENDMARKER) { | 
					
						
							|  |  |  |         PyErr_SetString(PyExc_StopIteration, "EOF"); | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |         goto exit; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     PyObject *str = NULL; | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |     if (token.start == NULL || token.end == NULL) { | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |         str = PyUnicode_FromString(""); | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |         str = PyUnicode_FromStringAndSize(token.start, token.end - token.start); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     } | 
					
						
							|  |  |  |     if (str == NULL) { | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |         goto exit; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Py_ssize_t size = it->tok->inp - it->tok->buf; | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     PyObject *line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace"); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (line == NULL) { | 
					
						
							|  |  |  |         Py_DECREF(str); | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |         goto exit; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |     const char *line_start = ISSTRINGLIT(type) ? it->tok->multi_line_start : it->tok->line_start; | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     Py_ssize_t lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno; | 
					
						
							|  |  |  |     Py_ssize_t end_lineno = it->tok->lineno; | 
					
						
							|  |  |  |     Py_ssize_t col_offset = -1; | 
					
						
							|  |  |  |     Py_ssize_t end_col_offset = -1; | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |     if (token.start != NULL && token.start >= line_start) { | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |         col_offset = _PyPegen_byte_offset_to_character_offset(line, token.start - line_start); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |     if (token.end != NULL && token.end >= it->tok->line_start) { | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |         end_col_offset = _PyPegen_byte_offset_to_character_offset(line, token.end - it->tok->line_start); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-22 02:29:04 +02:00
										 |  |  |     if (it->tok->tok_extra_tokens) { | 
					
						
							|  |  |  |         // Necessary adjustments to match the original Python tokenize
 | 
					
						
							|  |  |  |         // implementation
 | 
					
						
							|  |  |  |         if (type > DEDENT && type < OP) { | 
					
						
							|  |  |  |             type = OP; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else if (type == ASYNC || type == AWAIT) { | 
					
						
							|  |  |  |             type = NAME; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else if (type == NEWLINE) { | 
					
						
							|  |  |  |             str = PyUnicode_FromString("\n"); | 
					
						
							|  |  |  |             end_col_offset++; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line); | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  | exit: | 
					
						
							|  |  |  |     _PyToken_Free(&token); | 
					
						
							|  |  |  |     return result; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | tokenizeriter_dealloc(tokenizeriterobject *it) | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     PyTypeObject *tp = Py_TYPE(it); | 
					
						
							| 
									
										
										
										
											2021-10-13 17:22:14 +02:00
										 |  |  |     _PyTokenizer_Free(it->tok); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     tp->tp_free(it); | 
					
						
							|  |  |  |     Py_DECREF(tp); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyType_Slot tokenizeriter_slots[] = { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     {Py_tp_new, tokenizeriter_new}, | 
					
						
							|  |  |  |     {Py_tp_dealloc, tokenizeriter_dealloc}, | 
					
						
							|  |  |  |     {Py_tp_getattro, PyObject_GenericGetAttr}, | 
					
						
							|  |  |  |     {Py_tp_iter, PyObject_SelfIter}, | 
					
						
							|  |  |  |     {Py_tp_iternext, tokenizeriter_next}, | 
					
						
							|  |  |  |     {0, NULL}, | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyType_Spec tokenizeriter_spec = { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     .name = "_tokenize.TokenizerIter", | 
					
						
							|  |  |  |     .basicsize = sizeof(tokenizeriterobject), | 
					
						
							|  |  |  |     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE), | 
					
						
							|  |  |  |     .slots = tokenizeriter_slots, | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | tokenizemodule_exec(PyObject *m) | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     tokenize_state *state = get_tokenize_state(m); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (state == NULL) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     state->TokenizerIter = (PyTypeObject *)PyType_FromModuleAndSpec(m, &tokenizeriter_spec, NULL); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (state->TokenizerIter == NULL) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (PyModule_AddType(m, state->TokenizerIter) < 0) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyMethodDef tokenize_methods[] = { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     {NULL, NULL, 0, NULL} /* Sentinel */ | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyModuleDef_Slot tokenizemodule_slots[] = { | 
					
						
							| 
									
										
										
										
											2021-10-03 16:58:14 +03:00
										 |  |  |     {Py_mod_exec, tokenizemodule_exec}, | 
					
						
							| 
									
										
										
										
											2023-05-05 15:11:27 -06:00
										 |  |  |     {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     {0, NULL} | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | tokenizemodule_traverse(PyObject *m, visitproc visit, void *arg) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     tokenize_state *state = get_tokenize_state(m); | 
					
						
							|  |  |  |     Py_VISIT(state->TokenizerIter); | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | tokenizemodule_clear(PyObject *m) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     tokenize_state *state = get_tokenize_state(m); | 
					
						
							|  |  |  |     Py_CLEAR(state->TokenizerIter); | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | tokenizemodule_free(void *m) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     tokenizemodule_clear((PyObject *)m); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static struct PyModuleDef _tokenizemodule = { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     PyModuleDef_HEAD_INIT, | 
					
						
							|  |  |  |     .m_name = "_tokenize", | 
					
						
							|  |  |  |     .m_size = sizeof(tokenize_state), | 
					
						
							|  |  |  |     .m_slots = tokenizemodule_slots, | 
					
						
							|  |  |  |     .m_methods = tokenize_methods, | 
					
						
							|  |  |  |     .m_traverse = tokenizemodule_traverse, | 
					
						
							|  |  |  |     .m_clear = tokenizemodule_clear, | 
					
						
							|  |  |  |     .m_free = tokenizemodule_free, | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PyMODINIT_FUNC | 
					
						
							|  |  |  | PyInit__tokenize(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     return PyModuleDef_Init(&_tokenizemodule); | 
					
						
							|  |  |  | } |