mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	bpo-28002: Roundtrip f-strings with ast.unparse better (GH-19612) (GH-23430)
By attempting to avoid backslashes in f-string expressions.
We also now proactively raise errors for some backslashes we can't
avoid while unparsing FormattedValues
Co-authored-by: hauntsaninja <>
Co-authored-by: Shantanu <hauntsaninja@users.noreply.github.com>
Co-authored-by: Batuhan Taskaya <isidentical@gmail.com>
(cherry picked from commit a993e901eb)
Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com>
Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com>
			
			
This commit is contained in:
		
							parent
							
								
									f552f4b2d6
								
							
						
					
					
						commit
						3763cc1dbd
					
				
					 2 changed files with 115 additions and 37 deletions
				
			
		
							
								
								
									
										108
									
								
								Lib/ast.py
									
										
									
									
									
								
							
							
						
						
									
										108
									
								
								Lib/ast.py
									
										
									
									
									
								
							|  | @ -662,17 +662,23 @@ def next(self): | ||||||
|         except ValueError: |         except ValueError: | ||||||
|             return self |             return self | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
|  | _SINGLE_QUOTES = ("'", '"') | ||||||
|  | _MULTI_QUOTES = ('"""', "'''") | ||||||
|  | _ALL_QUOTES = (*_SINGLE_QUOTES, *_MULTI_QUOTES) | ||||||
|  | 
 | ||||||
| class _Unparser(NodeVisitor): | class _Unparser(NodeVisitor): | ||||||
|     """Methods in this class recursively traverse an AST and |     """Methods in this class recursively traverse an AST and | ||||||
|     output source code for the abstract syntax; original formatting |     output source code for the abstract syntax; original formatting | ||||||
|     is disregarded.""" |     is disregarded.""" | ||||||
| 
 | 
 | ||||||
|     def __init__(self): |     def __init__(self, *, _avoid_backslashes=False): | ||||||
|         self._source = [] |         self._source = [] | ||||||
|         self._buffer = [] |         self._buffer = [] | ||||||
|         self._precedences = {} |         self._precedences = {} | ||||||
|         self._type_ignores = {} |         self._type_ignores = {} | ||||||
|         self._indent = 0 |         self._indent = 0 | ||||||
|  |         self._avoid_backslashes = _avoid_backslashes | ||||||
| 
 | 
 | ||||||
|     def interleave(self, inter, f, seq): |     def interleave(self, inter, f, seq): | ||||||
|         """Call f on each item in seq, calling inter() in between.""" |         """Call f on each item in seq, calling inter() in between.""" | ||||||
|  | @ -1067,15 +1073,85 @@ def visit_AsyncWith(self, node): | ||||||
|         with self.block(extra=self.get_type_comment(node)): |         with self.block(extra=self.get_type_comment(node)): | ||||||
|             self.traverse(node.body) |             self.traverse(node.body) | ||||||
| 
 | 
 | ||||||
|  |     def _str_literal_helper( | ||||||
|  |         self, string, *, quote_types=_ALL_QUOTES, escape_special_whitespace=False | ||||||
|  |     ): | ||||||
|  |         """Helper for writing string literals, minimizing escapes. | ||||||
|  |         Returns the tuple (string literal to write, possible quote types). | ||||||
|  |         """ | ||||||
|  |         def escape_char(c): | ||||||
|  |             # \n and \t are non-printable, but we only escape them if | ||||||
|  |             # escape_special_whitespace is True | ||||||
|  |             if not escape_special_whitespace and c in "\n\t": | ||||||
|  |                 return c | ||||||
|  |             # Always escape backslashes and other non-printable characters | ||||||
|  |             if c == "\\" or not c.isprintable(): | ||||||
|  |                 return c.encode("unicode_escape").decode("ascii") | ||||||
|  |             return c | ||||||
|  | 
 | ||||||
|  |         escaped_string = "".join(map(escape_char, string)) | ||||||
|  |         possible_quotes = quote_types | ||||||
|  |         if "\n" in escaped_string: | ||||||
|  |             possible_quotes = [q for q in possible_quotes if q in _MULTI_QUOTES] | ||||||
|  |         possible_quotes = [q for q in possible_quotes if q not in escaped_string] | ||||||
|  |         if not possible_quotes: | ||||||
|  |             # If there aren't any possible_quotes, fallback to using repr | ||||||
|  |             # on the original string. Try to use a quote from quote_types, | ||||||
|  |             # e.g., so that we use triple quotes for docstrings. | ||||||
|  |             string = repr(string) | ||||||
|  |             quote = next((q for q in quote_types if string[0] in q), string[0]) | ||||||
|  |             return string[1:-1], [quote] | ||||||
|  |         if escaped_string: | ||||||
|  |             # Sort so that we prefer '''"''' over """\"""" | ||||||
|  |             possible_quotes.sort(key=lambda q: q[0] == escaped_string[-1]) | ||||||
|  |             # If we're using triple quotes and we'd need to escape a final | ||||||
|  |             # quote, escape it | ||||||
|  |             if possible_quotes[0][0] == escaped_string[-1]: | ||||||
|  |                 assert len(possible_quotes[0]) == 3 | ||||||
|  |                 escaped_string = escaped_string[:-1] + "\\" + escaped_string[-1] | ||||||
|  |         return escaped_string, possible_quotes | ||||||
|  | 
 | ||||||
|  |     def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES): | ||||||
|  |         """Write string literal value with a best effort attempt to avoid backslashes.""" | ||||||
|  |         string, quote_types = self._str_literal_helper(string, quote_types=quote_types) | ||||||
|  |         quote_type = quote_types[0] | ||||||
|  |         self.write(f"{quote_type}{string}{quote_type}") | ||||||
|  | 
 | ||||||
|     def visit_JoinedStr(self, node): |     def visit_JoinedStr(self, node): | ||||||
|         self.write("f") |         self.write("f") | ||||||
|  |         if self._avoid_backslashes: | ||||||
|             self._fstring_JoinedStr(node, self.buffer_writer) |             self._fstring_JoinedStr(node, self.buffer_writer) | ||||||
|         self.write(repr(self.buffer)) |             self._write_str_avoiding_backslashes(self.buffer) | ||||||
|  |             return | ||||||
|  | 
 | ||||||
|  |         # If we don't need to avoid backslashes globally (i.e., we only need | ||||||
|  |         # to avoid them inside FormattedValues), it's cosmetically preferred | ||||||
|  |         # to use escaped whitespace. That is, it's preferred to use backslashes | ||||||
|  |         # for cases like: f"{x}\n". To accomplish this, we keep track of what | ||||||
|  |         # in our buffer corresponds to FormattedValues and what corresponds to | ||||||
|  |         # Constant parts of the f-string, and allow escapes accordingly. | ||||||
|  |         buffer = [] | ||||||
|  |         for value in node.values: | ||||||
|  |             meth = getattr(self, "_fstring_" + type(value).__name__) | ||||||
|  |             meth(value, self.buffer_writer) | ||||||
|  |             buffer.append((self.buffer, isinstance(value, Constant))) | ||||||
|  |         new_buffer = [] | ||||||
|  |         quote_types = _ALL_QUOTES | ||||||
|  |         for value, is_constant in buffer: | ||||||
|  |             # Repeatedly narrow down the list of possible quote_types | ||||||
|  |             value, quote_types = self._str_literal_helper( | ||||||
|  |                 value, quote_types=quote_types, | ||||||
|  |                 escape_special_whitespace=is_constant | ||||||
|  |             ) | ||||||
|  |             new_buffer.append(value) | ||||||
|  |         value = "".join(new_buffer) | ||||||
|  |         quote_type = quote_types[0] | ||||||
|  |         self.write(f"{quote_type}{value}{quote_type}") | ||||||
| 
 | 
 | ||||||
|     def visit_FormattedValue(self, node): |     def visit_FormattedValue(self, node): | ||||||
|         self.write("f") |         self.write("f") | ||||||
|         self._fstring_FormattedValue(node, self.buffer_writer) |         self._fstring_FormattedValue(node, self.buffer_writer) | ||||||
|         self.write(repr(self.buffer)) |         self._write_str_avoiding_backslashes(self.buffer) | ||||||
| 
 | 
 | ||||||
|     def _fstring_JoinedStr(self, node, write): |     def _fstring_JoinedStr(self, node, write): | ||||||
|         for value in node.values: |         for value in node.values: | ||||||
|  | @ -1090,11 +1166,13 @@ def _fstring_Constant(self, node, write): | ||||||
| 
 | 
 | ||||||
|     def _fstring_FormattedValue(self, node, write): |     def _fstring_FormattedValue(self, node, write): | ||||||
|         write("{") |         write("{") | ||||||
|         unparser = type(self)() |         unparser = type(self)(_avoid_backslashes=True) | ||||||
|         unparser.set_precedence(_Precedence.TEST.next(), node.value) |         unparser.set_precedence(_Precedence.TEST.next(), node.value) | ||||||
|         expr = unparser.visit(node.value) |         expr = unparser.visit(node.value) | ||||||
|         if expr.startswith("{"): |         if expr.startswith("{"): | ||||||
|             write(" ")  # Separate pair of opening brackets as "{ {" |             write(" ")  # Separate pair of opening brackets as "{ {" | ||||||
|  |         if "\\" in expr: | ||||||
|  |             raise ValueError("Unable to avoid backslash in f-string expression part") | ||||||
|         write(expr) |         write(expr) | ||||||
|         if node.conversion != -1: |         if node.conversion != -1: | ||||||
|             conversion = chr(node.conversion) |             conversion = chr(node.conversion) | ||||||
|  | @ -1111,33 +1189,17 @@ def visit_Name(self, node): | ||||||
|         self.write(node.id) |         self.write(node.id) | ||||||
| 
 | 
 | ||||||
|     def _write_docstring(self, node): |     def _write_docstring(self, node): | ||||||
|         def esc_char(c): |  | ||||||
|             if c in ("\n", "\t"): |  | ||||||
|                 # In the AST form, we don't know the author's intentation |  | ||||||
|                 # about how this should be displayed. We'll only escape |  | ||||||
|                 # \n and \t, because they are more likely to be unescaped |  | ||||||
|                 # in the source |  | ||||||
|                 return c |  | ||||||
|             return c.encode('unicode_escape').decode('ascii') |  | ||||||
| 
 |  | ||||||
|         self.fill() |         self.fill() | ||||||
|         if node.kind == "u": |         if node.kind == "u": | ||||||
|             self.write("u") |             self.write("u") | ||||||
| 
 |         self._write_str_avoiding_backslashes(node.value, quote_types=_MULTI_QUOTES) | ||||||
|         value = node.value |  | ||||||
|         if value: |  | ||||||
|             # Preserve quotes in the docstring by escaping them |  | ||||||
|             value = "".join(map(esc_char, value)) |  | ||||||
|             if value[-1] == '"': |  | ||||||
|                 value = value.replace('"', '\\"', -1) |  | ||||||
|             value = value.replace('"""', '""\\"') |  | ||||||
| 
 |  | ||||||
|         self.write(f'"""{value}"""') |  | ||||||
| 
 | 
 | ||||||
|     def _write_constant(self, value): |     def _write_constant(self, value): | ||||||
|         if isinstance(value, (float, complex)): |         if isinstance(value, (float, complex)): | ||||||
|             # Substitute overflowing decimal literal for AST infinities. |             # Substitute overflowing decimal literal for AST infinities. | ||||||
|             self.write(repr(value).replace("inf", _INFSTR)) |             self.write(repr(value).replace("inf", _INFSTR)) | ||||||
|  |         elif self._avoid_backslashes and isinstance(value, str): | ||||||
|  |             self._write_str_avoiding_backslashes(value) | ||||||
|         else: |         else: | ||||||
|             self.write(repr(value)) |             self.write(repr(value)) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -152,6 +152,18 @@ def test_fstrings(self): | ||||||
|         # See issue 25180 |         # See issue 25180 | ||||||
|         self.check_ast_roundtrip(r"""f'{f"{0}"*3}'""") |         self.check_ast_roundtrip(r"""f'{f"{0}"*3}'""") | ||||||
|         self.check_ast_roundtrip(r"""f'{f"{y}"*3}'""") |         self.check_ast_roundtrip(r"""f'{f"{y}"*3}'""") | ||||||
|  |         self.check_ast_roundtrip("""f''""") | ||||||
|  |         self.check_ast_roundtrip('''f"""'end' "quote\\""""''') | ||||||
|  | 
 | ||||||
|  |     def test_fstrings_complicated(self): | ||||||
|  |         # See issue 28002 | ||||||
|  |         self.check_ast_roundtrip("""f'''{"'"}'''""") | ||||||
|  |         self.check_ast_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-\'\'\'''') | ||||||
|  |         self.check_ast_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-'single quote\\'\'\'\'''') | ||||||
|  |         self.check_ast_roundtrip('f"""{\'\'\'\n\'\'\'}"""') | ||||||
|  |         self.check_ast_roundtrip('f"""{g(\'\'\'\n\'\'\')}"""') | ||||||
|  |         self.check_ast_roundtrip('''f"a\\r\\nb"''') | ||||||
|  |         self.check_ast_roundtrip('''f"\\u2028{'x'}"''') | ||||||
| 
 | 
 | ||||||
|     def test_strings(self): |     def test_strings(self): | ||||||
|         self.check_ast_roundtrip("u'foo'") |         self.check_ast_roundtrip("u'foo'") | ||||||
|  | @ -311,6 +323,9 @@ def test_invalid_fstring_conversion(self): | ||||||
|             ) |             ) | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|  |     def test_invalid_fstring_backslash(self): | ||||||
|  |         self.check_invalid(ast.FormattedValue(value=ast.Constant(value="\\\\"))) | ||||||
|  | 
 | ||||||
|     def test_invalid_set(self): |     def test_invalid_set(self): | ||||||
|         self.check_invalid(ast.Set(elts=[])) |         self.check_invalid(ast.Set(elts=[])) | ||||||
| 
 | 
 | ||||||
|  | @ -330,8 +345,8 @@ def test_docstrings(self): | ||||||
|             '\r\\r\t\\t\n\\n', |             '\r\\r\t\\t\n\\n', | ||||||
|             '""">>> content = \"\"\"blabla\"\"\" <<<"""', |             '""">>> content = \"\"\"blabla\"\"\" <<<"""', | ||||||
|             r'foo\n\x00', |             r'foo\n\x00', | ||||||
|             '🐍⛎𩸽üéş^\N{LONG RIGHTWARDS SQUIGGLE ARROW}' |             "' \\'\\'\\'\"\"\" \"\"\\'\\' \\'", | ||||||
| 
 |             '🐍⛎𩸽üéş^\\\\X\\\\BB\N{LONG RIGHTWARDS SQUIGGLE ARROW}' | ||||||
|         ) |         ) | ||||||
|         for docstring in docstrings: |         for docstring in docstrings: | ||||||
|             # check as Module docstrings for easy testing |             # check as Module docstrings for easy testing | ||||||
|  | @ -416,7 +431,6 @@ def test_simple_expressions_parens(self): | ||||||
|         self.check_src_roundtrip("call((yield x))") |         self.check_src_roundtrip("call((yield x))") | ||||||
|         self.check_src_roundtrip("return x + (yield x)") |         self.check_src_roundtrip("return x + (yield x)") | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
|     def test_class_bases_and_keywords(self): |     def test_class_bases_and_keywords(self): | ||||||
|         self.check_src_roundtrip("class X:\n    pass") |         self.check_src_roundtrip("class X:\n    pass") | ||||||
|         self.check_src_roundtrip("class X(A):\n    pass") |         self.check_src_roundtrip("class X(A):\n    pass") | ||||||
|  | @ -429,6 +443,13 @@ def test_class_bases_and_keywords(self): | ||||||
|         self.check_src_roundtrip("class X(*args):\n    pass") |         self.check_src_roundtrip("class X(*args):\n    pass") | ||||||
|         self.check_src_roundtrip("class X(*args, **kwargs):\n    pass") |         self.check_src_roundtrip("class X(*args, **kwargs):\n    pass") | ||||||
| 
 | 
 | ||||||
|  |     def test_fstrings(self): | ||||||
|  |         self.check_src_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-\'\'\'''') | ||||||
|  |         self.check_src_roundtrip('''f"\\u2028{'x'}"''') | ||||||
|  |         self.check_src_roundtrip(r"f'{x}\n'") | ||||||
|  |         self.check_src_roundtrip('''f''\'{"""\n"""}\\n''\'''') | ||||||
|  |         self.check_src_roundtrip('''f''\'{f"""{x}\n"""}\\n''\'''') | ||||||
|  | 
 | ||||||
|     def test_docstrings(self): |     def test_docstrings(self): | ||||||
|         docstrings = ( |         docstrings = ( | ||||||
|             '"""simple doc string"""', |             '"""simple doc string"""', | ||||||
|  | @ -443,6 +464,10 @@ def test_docstrings(self): | ||||||
|             '""""""', |             '""""""', | ||||||
|             '"""\'\'\'"""', |             '"""\'\'\'"""', | ||||||
|             '"""\'\'\'\'\'\'"""', |             '"""\'\'\'\'\'\'"""', | ||||||
|  |             '"""🐍⛎𩸽üéş^\\\\X\\\\BB⟿"""', | ||||||
|  |             '"""end in single \'quote\'"""', | ||||||
|  |             "'''end in double \"quote\"'''", | ||||||
|  |             '"""almost end in double "quote"."""', | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|         for prefix in docstring_prefixes: |         for prefix in docstring_prefixes: | ||||||
|  | @ -483,9 +508,8 @@ class DirectoryTestCase(ASTTestCase): | ||||||
| 
 | 
 | ||||||
|     lib_dir = pathlib.Path(__file__).parent / ".." |     lib_dir = pathlib.Path(__file__).parent / ".." | ||||||
|     test_directories = (lib_dir, lib_dir / "test") |     test_directories = (lib_dir, lib_dir / "test") | ||||||
|     skip_files = {"test_fstring.py"} |  | ||||||
|     run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py", |     run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py", | ||||||
|                         "test_ast.py", "test_asdl_parser.py"} |                         "test_ast.py", "test_asdl_parser.py", "test_fstring.py"} | ||||||
| 
 | 
 | ||||||
|     _files_to_test = None |     _files_to_test = None | ||||||
| 
 | 
 | ||||||
|  | @ -525,14 +549,6 @@ def test_files(self): | ||||||
|             if test.support.verbose: |             if test.support.verbose: | ||||||
|                 print(f"Testing {item.absolute()}") |                 print(f"Testing {item.absolute()}") | ||||||
| 
 | 
 | ||||||
|             # Some f-strings are not correctly round-tripped by |  | ||||||
|             # Tools/parser/unparse.py.  See issue 28002 for details. |  | ||||||
|             # We need to skip files that contain such f-strings. |  | ||||||
|             if item.name in self.skip_files: |  | ||||||
|                 if test.support.verbose: |  | ||||||
|                     print(f"Skipping {item.absolute()}: see issue 28002") |  | ||||||
|                 continue |  | ||||||
| 
 |  | ||||||
|             with self.subTest(filename=item): |             with self.subTest(filename=item): | ||||||
|                 source = read_pyfile(item) |                 source = read_pyfile(item) | ||||||
|                 self.check_ast_roundtrip(source) |                 self.check_ast_roundtrip(source) | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue