mirror of
				https://github.com/python/cpython.git
				synced 2025-10-26 03:04:41 +00:00 
			
		
		
		
	 428de65ca9
			
		
	
	
		428de65ca9
		
	
	
	
	
		
			
			renamed tokenize and now works with bytes rather than strings. A new detect_encoding function has been added for determining source file encoding according to PEP-0263. Token sequences returned by tokenize always start with an ENCODING token which specifies the encoding used to decode the file. This token is used to encode the output of untokenize back to bytes. Credit goes to Michael "I'm-going-to-name-my-first-child-unittest" Foord from Resolver Systems for this work.
		
			
				
	
	
		
			13 lines
		
	
	
	
		
			437 B
		
	
	
	
		
			Text
		
	
	
	
	
	
			
		
		
	
	
			13 lines
		
	
	
	
		
			437 B
		
	
	
	
		
			Text
		
	
	
	
	
	
| # -*- coding: utf-8 -*-
 | |
| # IMPORTANT: unlike the other test_tokenize-*.txt files, this file
 | |
| # does NOT have the utf-8 BOM signature '\xef\xbb\xbf' at the start
 | |
| # of it.  Make sure this is not added inadvertently by your editor
 | |
| # if any changes are made to this file!
 | |
| 
 | |
| # Arbitrary encoded utf-8 text (stolen from test_doctest2.py).
 | |
| x = 'ЉЊЈЁЂ'
 | |
| def y():
 | |
|     """
 | |
|     And again in a comment.  ЉЊЈЁЂ
 | |
|     """
 | |
|     pass
 |