bpo-44667: Treat correctly lines ending with comments and no newlines in the Python tokenizer (GH-27499) (GH-27501)

(cherry picked from commit b6bde9fc42)

Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
This commit is contained in:
Miss Islington (bot) 2021-08-02 02:44:01 -07:00 committed by GitHub
parent 2efa78180d
commit 2d11797c81
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 15 additions and 1 deletions

View file

@ -1457,6 +1457,16 @@ def test_pathological_trailing_whitespace(self):
# See http://bugs.python.org/issue16152
self.assertExactTypeEqual('@ ', token.AT)
def test_comment_at_the_end_of_the_source_without_newline(self):
# See http://bugs.python.org/issue44667
source = 'b = 1\n\n#test'
expected_tokens = [token.NAME, token.EQUAL, token.NUMBER, token.NEWLINE, token.NL, token.COMMENT]
tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
self.assertEqual(tok_name[tokens[0].exact_type], tok_name[ENCODING])
for i in range(6):
self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]])
self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER])
class UntokenizeTest(TestCase):

View file

@ -602,7 +602,7 @@ def _tokenize(readline, encoding):
pos += 1
# Add an implicit NEWLINE if the input doesn't end in one
if last_line and last_line[-1] not in '\r\n':
if last_line and last_line[-1] not in '\r\n' and not last_line.strip().startswith("#"):
yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
for indent in indents[1:]: # pop remaining indent levels
yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')

View file

@ -0,0 +1,4 @@
The :func:`tokenize.tokenize` doesn't incorrectly generate a ``NEWLINE``
token if the source doesn't end with a new line character but the last line
is a comment, as the function is already generating a ``NL`` token. Patch by
Pablo Galindo