Serhiy Storchaka 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5c942f11cd 
								
							 
						 
						
							
							
								
								gh-63161: Fix PEP 263 support (GH-139481)  
							
							... 
							
							
							
							* Support non-UTF-8 shebang and comments if non-UTF-8 encoding is specified.
* Detect decoding error in comments for UTF-8 encoding.
* Include the decoding error position for default encoding in SyntaxError. 
							
						 
						
							2025-10-10 12:51:19 +00:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Pablo Galindo Salgado 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								56eda25633 
								
							 
						 
						
							
							
								
								gh-116042: Fix location for SyntaxErrors of invalid escapes in the tokenizer ( #116049 )  
							
							
							
						 
						
							2025-02-13 01:07:37 +00:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Victor Stinner 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b9a8ca0a6a 
								
							 
						 
						
							
							
								
								gh-115754: Use Py_GetConstant(Py_CONSTANT_EMPTY_STR) ( #125194 )  
							
							... 
							
							
							
							Replace PyUnicode_New(0, 0), PyUnicode_FromString("")
and PyUnicode_FromStringAndSize("", 0)
with Py_GetConstant(Py_CONSTANT_EMPTY_STR). 
							
						 
						
							2024-10-09 17:15:23 +02:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Xie Yanbo 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2a5d1eb707 
								
							 
						 
						
							
							
								
								Fix typos in comments and exception message ( #122147 )  
							
							
							
						 
						
							2024-07-23 14:34:14 +05:30 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Pablo Galindo Salgado 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								015b97d19a 
								
							 
						 
						
							
							
								
								gh-115823: Calculate correctly error locations when dealing with implicit encodings ( #115824 )  
							
							
							
						 
						
							2024-02-26 12:57:09 +00:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Pablo Galindo Salgado 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9ed36d533a 
								
							 
						 
						
							
							
								
								gh-113602: Bail out when the parser tries to override existing errors ( #113607 )  
							
							... 
							
							
							
							Signed-off-by: Pablo Galindo <pablogsal@gmail.com> 
							
						 
						
							2024-01-02 13:00:52 +00:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Yang Hau 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								707c37e373 
								
							 
						 
						
							
							
								
								Fix typos in variable names, function names, and comments (GH-101868)  
							
							
							
						 
						
							2023-12-01 09:37:40 +00:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Pablo Galindo Salgado 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								45d648597b 
								
							 
						 
						
							
							
								
								gh-112387: Fix error positions for decoded strings with backwards tokenize errors ( #112409 )  
							
							... 
							
							
							
							Signed-off-by: Pablo Galindo <pablogsal@gmail.com> 
							
						 
						
							2023-11-27 18:37:48 +00:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Pablo Galindo Salgado 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2c8b191742 
								
							 
						 
						
							
							
								
								gh-112388: Fix an error that was causing the parser to try to overwrite tokenizer errors ( #112410 )  
							
							... 
							
							
							
							Signed-off-by: Pablo Galindo <pablogsal@gmail.com> 
							
						 
						
							2023-11-27 18:36:11 +00:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Lysandros Nikolaou 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f46333b9f5 
								
							 
						 
						
							
							
								
								gh-107450: Remove unnecessary overflow check in parser error handler ( #110940 )  
							
							
							
						 
						
							2023-10-16 22:41:01 +02:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Lysandros Nikolaou 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a1ac5590e0 
								
							 
						 
						
							
							
								
								gh-107450: Check for overflow in the tokenizer and fix overflow test ( #110832 )  
							
							... 
							
							
							
							Co-authored-by: Filipe Laíns <lains@riseup.net>
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com> 
							
						 
						
							2023-10-16 16:42:49 +02:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Lysandros Nikolaou 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fb7843ee89 
								
							 
						 
						
							
							
								
								gh-107450: Raise OverflowError when parser column offset overflows ( #110754 )  
							
							
							
						 
						
							2023-10-12 09:34:12 +00:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Lysandros Nikolaou 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								01481f2dc1 
								
							 
						 
						
							
							
								
								gh-104169: Refactor tokenizer into lexer and wrappers ( #110684 )  
							
							... 
							
							
							
							* The lexer, which include the actual lexeme producing logic, goes into
  the `lexer` directory.
* The wrappers, one wrapper per input mode (file, string, utf-8, and
  readline), go into the `tokenizer` directory and include logic for
  creating a lexer instance and managing the buffer for different modes.
---------
Co-authored-by: Pablo Galindo <pablogsal@gmail.com>
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> 
							
						 
						
							2023-10-11 15:14:44 +00:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Pablo Galindo Salgado 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b28ffaa193 
								
							 
						 
						
							
							
								
								gh-109596: Ensure repeated rules in the grammar are not allowed and fix incorrect soft keywords ( #109606 )  
							
							
							
						 
						
							2023-09-22 19:03:23 +01:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Dennis Sweeney 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								86617518c4 
								
							 
						 
						
							
							
								
								gh-108179: Add error message for parser stack overflows ( #108256 )  
							
							
							
						 
						
							2023-08-22 08:41:50 +01:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Victor Stinner 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c5afc97fc2 
								
							 
						 
						
							
							
								
								gh-106320: Remove private _PyErr C API functions ( #106356 )  
							
							... 
							
							
							
							Remove private _PyErr C API functions: move them to the internal
C API (pycore_pyerrors.h). 
							
						 
						
							2023-07-03 10:48:50 +00:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Marta Gómez Macías 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6715f91edc 
								
							 
						 
						
							
							
								
								gh-102856: Python tokenizer implementation for PEP 701 ( #104323 )  
							
							... 
							
							
							
							This commit replaces the Python implementation of the tokenize module with an implementation
that reuses the real C tokenizer via a private extension module. The tokenize module now implements
a compatibility layer that transforms tokens from the C tokenizer into Python tokenize tokens for backward
compatibility.
As the C tokenizer does not emit some tokens that the Python tokenizer provides (such as comments and non-semantic newlines), a new special mode has been added to the C tokenizer mode that currently is only used via
the extension module that exposes it to the Python layer. This new mode forces the C tokenizer to emit these new extra tokens and add the appropriate metadata that is needed to match the old Python implementation.
Co-authored-by: Pablo Galindo <pablogsal@gmail.com> 
							
						 
						
							2023-05-21 01:03:02 +01:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Lysandros Nikolaou 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9169a56fad 
								
							 
						 
						
							
							
								
								gh-103656: Transfer f-string buffers to parser to avoid use-after-free (GH-103896)  
							
							... 
							
							
							
							Co-authored-by: Pablo Galindo <pablogsal@gmail.com> 
							
						 
						
							2023-04-27 01:33:31 +00:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Pablo Galindo Salgado 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1ef61cf71a 
								
							 
						 
						
							
							
								
								gh-102856: Initial implementation of PEP 701 ( #102855 )  
							
							... 
							
							
							
							Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
Co-authored-by: Batuhan Taskaya <isidentical@gmail.com>
Co-authored-by: Marta Gómez Macías <mgmacias@google.com>
Co-authored-by: sunmy2019 <59365878+sunmy2019@users.noreply.github.com> 
							
						 
						
							2023-04-19 11:18:16 -05:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Pablo Galindo Salgado 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								97e7004cfe 
								
							 
						 
						
							
							
								
								gh-100050: Fix an assertion error when raising unclosed parenthesis errors in the tokenizer (GH-100065)  
							
							... 
							
							
							
							Automerge-Triggered-By: GH:pablogsal 
							
						 
						
							2022-12-06 15:09:56 -08:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Lysandros Nikolaou 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cbf0afd8a1 
								
							 
						 
						
							
							
								
								gh-97973: Return all necessary information from the tokenizer (GH-97984)  
							
							... 
							
							
							
							Right now, the tokenizer only returns type and two pointers to the start and end of the token.
This PR modifies the tokenizer to return the type and set all of the necessary information,
so that the parser does not have to this. 
							
						 
						
							2022-10-06 16:07:17 -07:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Christian Heimes 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b4c857d0fd 
								
							 
						 
						
							
							
								
								gh-95876: Fix format string in pegen error location code ( #95877 )  
							
							
							
						 
						
							2022-08-11 09:55:57 +01:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Paul m. p. Peny 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								bbb2ab70b6 
								
							 
						 
						
							
							
								
								[3.11] bpo-14916: interactive fd is not tied to stdin [type-bug] ( #91469 )  
							
							... 
							
							
							
							* bpo-14916: interactive fd is not always stdin
related to https://github.com/python/cpython/pull/31006  merged bugfix
following https://bugs.python.org/issue14916 
* 📜 🤖  Added by blurb_it.
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> 
							
						 
						
							2022-07-16 09:35:19 +01:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Pablo Galindo Salgado 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								36fcde61ba 
								
							 
						 
						
							
							
								
								gh-94360: Fix a tokenizer crash when reading encoded files with syntax errors from stdin ( #94386 )  
							
							... 
							
							
							
							* gh-94360: Fix a tokenizer crash when reading encoded files with syntax errors from stdin
Signed-off-by: Pablo Galindo <pablogsal@gmail.com>
* nitty nit
Co-authored-by: Łukasz Langa <lukasz@langa.pl> 
							
						 
						
							2022-07-05 17:39:21 +01:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Pablo Galindo Salgado 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								26cca8067b 
								
							 
						 
						
							
							
								
								bpo-47117: Don't crash if we fail to decode characters when the tokenizer buffers are uninitialized (GH-32129)  
							
							... 
							
							
							
							Automerge-Triggered-By: GH:pablogsal 
							
						 
						
							2022-03-26 09:29:02 -07:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Pablo Galindo Salgado 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								650720a0cf 
								
							 
						 
						
							
							
								
								Fix the caret position in some syntax errors in interactive mode (GH-30718)  
							
							
							
						 
						
							2022-01-20 15:34:13 +00:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Pablo Galindo Salgado 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8c2fd09f36 
								
							 
						 
						
							
							
								
								bpo-46339: Include clarification on assert in 'get_error_line_from_tokenizer_buffers' ( #30545 )  
							
							
							
						 
						
							2022-01-18 11:13:00 +00:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Pablo Galindo Salgado 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cedec19be8 
								
							 
						 
						
							
							
								
								bpo-46339: Fix crash in the parser when computing error text for multi-line f-strings (GH-30529)  
							
							... 
							
							
							
							Automerge-Triggered-By: GH:pablogsal 
							
						 
						
							2022-01-11 08:30:39 -08:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Pablo Galindo Salgado 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								70f415fb8b 
								
							 
						 
						
							
							
								
								bpo-46240: Correct the error for unclosed parentheses when the tokenizer is not finished (GH-30378)  
							
							
							
						 
						
							2022-01-04 10:41:22 +00:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Pablo Galindo Salgado 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								24c10d2943 
								
							 
						 
						
							
							
								
								bpo-45727: Only trigger the 'did you forgot a comma' error suggestion if inside parentheses (GH-29757)  
							
							
							
						 
						
							2021-11-24 22:21:23 +00:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Pablo Galindo Salgado 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4f006a789a 
								
							 
						 
						
							
							
								
								Ensure the str member of the tokenizer is always initialised (GH-29681)  
							
							
							
						 
						
							2021-11-21 02:06:39 +00:00 
							
								 
							
							
								 
							
						 
					 
				
					
						
							
								
								
									Pablo Galindo Salgado 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c9c4444d9f 
								
							 
						 
						
							
							
								
								Refactor parser compilation units into specific components (GH-29676)  
							
							
							
						 
						
							2021-11-21 01:08:50 +00:00