| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | #include "Python.h"
 | 
					
						
							|  |  |  | #include "../Parser/tokenizer.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static struct PyModuleDef _tokenizemodule; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     PyTypeObject *TokenizerIter; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | } tokenize_state; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | static tokenize_state * | 
					
						
							|  |  |  | get_tokenize_state(PyObject *module) { | 
					
						
							|  |  |  |     return (tokenize_state *)PyModule_GetState(module); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | #define _tokenize_get_state_by_type(type) \
 | 
					
						
							| 
									
										
										
										
											2022-02-11 17:22:11 +01:00
										 |  |  |     get_tokenize_state(PyType_GetModuleByDef(type, &_tokenizemodule)) | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-11 15:25:49 -06:00
										 |  |  | #include "pycore_runtime.h"
 | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | #include "clinic/Python-tokenize.c.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*[clinic input]
 | 
					
						
							|  |  |  | module _tokenizer | 
					
						
							|  |  |  | class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_type(type)->TokenizerIter" | 
					
						
							|  |  |  | [clinic start generated code]*/ | 
					
						
							|  |  |  | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=96d98ee2fef7a8bc]*/ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | typedef struct | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     PyObject_HEAD struct tok_state *tok; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | } tokenizeriterobject; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*[clinic input]
 | 
					
						
							|  |  |  | @classmethod | 
					
						
							|  |  |  | _tokenizer.tokenizeriter.__new__ as tokenizeriter_new | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     source: str | 
					
						
							|  |  |  | [clinic start generated code]*/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyObject * | 
					
						
							|  |  |  | tokenizeriter_new_impl(PyTypeObject *type, const char *source) | 
					
						
							|  |  |  | /*[clinic end generated code: output=7fd9f46cf9263cbb input=4384b368407375c6]*/ | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     tokenizeriterobject *self = (tokenizeriterobject *)type->tp_alloc(type, 0); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (self == NULL) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     PyObject *filename = PyUnicode_FromString("<string>"); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (filename == NULL) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-10-13 17:22:14 +02:00
										 |  |  |     self->tok = _PyTokenizer_FromUTF8(source, 1); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (self->tok == NULL) { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |         Py_DECREF(filename); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     self->tok->filename = filename; | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     return (PyObject *)self; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | static PyObject * | 
					
						
							|  |  |  | tokenizeriter_next(tokenizeriterobject *it) | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |     struct token token; | 
					
						
							|  |  |  |     int type = _PyTokenizer_Get(it->tok, &token); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (type == ERRORTOKEN && PyErr_Occurred()) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (type == ERRORTOKEN || type == ENDMARKER) { | 
					
						
							|  |  |  |         PyErr_SetString(PyExc_StopIteration, "EOF"); | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     PyObject *str = NULL; | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |     if (token.start == NULL || token.end == NULL) { | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |         str = PyUnicode_FromString(""); | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |         str = PyUnicode_FromStringAndSize(token.start, token.end - token.start); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     } | 
					
						
							|  |  |  |     if (str == NULL) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Py_ssize_t size = it->tok->inp - it->tok->buf; | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     PyObject *line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace"); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (line == NULL) { | 
					
						
							|  |  |  |         Py_DECREF(str); | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     const char *line_start = type == STRING ? it->tok->multi_line_start : it->tok->line_start; | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     int lineno = type == STRING ? it->tok->first_lineno : it->tok->lineno; | 
					
						
							|  |  |  |     int end_lineno = it->tok->lineno; | 
					
						
							|  |  |  |     int col_offset = -1; | 
					
						
							|  |  |  |     int end_col_offset = -1; | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |     if (token.start != NULL && token.start >= line_start) { | 
					
						
							|  |  |  |         col_offset = (int)(token.start - line_start); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |     if (token.end != NULL && token.end >= it->tok->line_start) { | 
					
						
							|  |  |  |         end_col_offset = (int)(token.end - it->tok->line_start); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return Py_BuildValue("(NiiiiiN)", str, type, lineno, end_lineno, col_offset, end_col_offset, line); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | tokenizeriter_dealloc(tokenizeriterobject *it) | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     PyTypeObject *tp = Py_TYPE(it); | 
					
						
							| 
									
										
										
										
											2021-10-13 17:22:14 +02:00
										 |  |  |     _PyTokenizer_Free(it->tok); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     tp->tp_free(it); | 
					
						
							|  |  |  |     Py_DECREF(tp); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyType_Slot tokenizeriter_slots[] = { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     {Py_tp_new, tokenizeriter_new}, | 
					
						
							|  |  |  |     {Py_tp_dealloc, tokenizeriter_dealloc}, | 
					
						
							|  |  |  |     {Py_tp_getattro, PyObject_GenericGetAttr}, | 
					
						
							|  |  |  |     {Py_tp_iter, PyObject_SelfIter}, | 
					
						
							|  |  |  |     {Py_tp_iternext, tokenizeriter_next}, | 
					
						
							|  |  |  |     {0, NULL}, | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyType_Spec tokenizeriter_spec = { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     .name = "_tokenize.TokenizerIter", | 
					
						
							|  |  |  |     .basicsize = sizeof(tokenizeriterobject), | 
					
						
							|  |  |  |     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE), | 
					
						
							|  |  |  |     .slots = tokenizeriter_slots, | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  | tokenizemodule_exec(PyObject *m) | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     tokenize_state *state = get_tokenize_state(m); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (state == NULL) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     state->TokenizerIter = (PyTypeObject *)PyType_FromModuleAndSpec(m, &tokenizeriter_spec, NULL); | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     if (state->TokenizerIter == NULL) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (PyModule_AddType(m, state->TokenizerIter) < 0) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyMethodDef tokenize_methods[] = { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     {NULL, NULL, 0, NULL} /* Sentinel */ | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyModuleDef_Slot tokenizemodule_slots[] = { | 
					
						
							| 
									
										
										
										
											2021-10-03 16:58:14 +03:00
										 |  |  |     {Py_mod_exec, tokenizemodule_exec}, | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  |     {0, NULL} | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | tokenizemodule_traverse(PyObject *m, visitproc visit, void *arg) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     tokenize_state *state = get_tokenize_state(m); | 
					
						
							|  |  |  |     Py_VISIT(state->TokenizerIter); | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | tokenizemodule_clear(PyObject *m) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     tokenize_state *state = get_tokenize_state(m); | 
					
						
							|  |  |  |     Py_CLEAR(state->TokenizerIter); | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | tokenizemodule_free(void *m) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     tokenizemodule_clear((PyObject *)m); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static struct PyModuleDef _tokenizemodule = { | 
					
						
							| 
									
										
										
										
											2021-08-25 13:41:14 +01:00
										 |  |  |     PyModuleDef_HEAD_INIT, | 
					
						
							|  |  |  |     .m_name = "_tokenize", | 
					
						
							|  |  |  |     .m_size = sizeof(tokenize_state), | 
					
						
							|  |  |  |     .m_slots = tokenizemodule_slots, | 
					
						
							|  |  |  |     .m_methods = tokenize_methods, | 
					
						
							|  |  |  |     .m_traverse = tokenizemodule_traverse, | 
					
						
							|  |  |  |     .m_clear = tokenizemodule_clear, | 
					
						
							|  |  |  |     .m_free = tokenizemodule_free, | 
					
						
							| 
									
										
										
										
											2021-08-24 17:50:05 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PyMODINIT_FUNC | 
					
						
							|  |  |  | PyInit__tokenize(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     return PyModuleDef_Init(&_tokenizemodule); | 
					
						
							|  |  |  | } |