mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 21:51:50 +00:00 
			
		
		
		
	Issue #22687: Fixed some corner cases in breaking words in tetxtwrap.
Got rid of quadratic complexity in breaking long words.
This commit is contained in:
		
							parent
							
								
									b365a06a84
								
							
						
					
					
						commit
						72bd327db0
					
				
					 3 changed files with 38 additions and 4 deletions
				
			
		|  | @ -184,6 +184,16 @@ def test_hyphenated(self): | ||||||
|         self.check_wrap(text, 42, |         self.check_wrap(text, 42, | ||||||
|                         ["this-is-a-useful-feature-for-reformatting-", |                         ["this-is-a-useful-feature-for-reformatting-", | ||||||
|                          "posts-from-tim-peters'ly"]) |                          "posts-from-tim-peters'ly"]) | ||||||
|  |         # The test tests current behavior but is not testing parts of the API. | ||||||
|  |         expect = ("this-|is-|a-|useful-|feature-|for-|" | ||||||
|  |                   "reformatting-|posts-|from-|tim-|peters'ly").split('|') | ||||||
|  |         self.check_wrap(text, 1, expect, break_long_words=False) | ||||||
|  |         self.check_split(text, expect) | ||||||
|  | 
 | ||||||
|  |         self.check_split('e-mail', ['e-mail']) | ||||||
|  |         self.check_split('Jelly-O', ['Jelly-O']) | ||||||
|  |         # The test tests current behavior but is not testing parts of the API. | ||||||
|  |         self.check_split('half-a-crown', 'half-|a-|crown'.split('|')) | ||||||
| 
 | 
 | ||||||
|     def test_hyphenated_numbers(self): |     def test_hyphenated_numbers(self): | ||||||
|         # Test that hyphenated numbers (eg. dates) are not broken like words. |         # Test that hyphenated numbers (eg. dates) are not broken like words. | ||||||
|  | @ -195,6 +205,7 @@ def test_hyphenated_numbers(self): | ||||||
|                                    'released on 1994-02-15.']) |                                    'released on 1994-02-15.']) | ||||||
|         self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.', |         self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.', | ||||||
|                                    'Python 1.0.1 was released on 1994-02-15.']) |                                    'Python 1.0.1 was released on 1994-02-15.']) | ||||||
|  |         self.check_wrap(text, 1, text.split(), break_long_words=False) | ||||||
| 
 | 
 | ||||||
|         text = "I do all my shopping at 7-11." |         text = "I do all my shopping at 7-11." | ||||||
|         self.check_wrap(text, 25, ["I do all my shopping at", |         self.check_wrap(text, 25, ["I do all my shopping at", | ||||||
|  | @ -202,6 +213,7 @@ def test_hyphenated_numbers(self): | ||||||
|         self.check_wrap(text, 27, ["I do all my shopping at", |         self.check_wrap(text, 27, ["I do all my shopping at", | ||||||
|                                    "7-11."]) |                                    "7-11."]) | ||||||
|         self.check_wrap(text, 29, ["I do all my shopping at 7-11."]) |         self.check_wrap(text, 29, ["I do all my shopping at 7-11."]) | ||||||
|  |         self.check_wrap(text, 1, text.split(), break_long_words=False) | ||||||
| 
 | 
 | ||||||
|     def test_em_dash(self): |     def test_em_dash(self): | ||||||
|         # Test text with em-dashes |         # Test text with em-dashes | ||||||
|  | @ -326,6 +338,10 @@ def test_punct_hyphens(self): | ||||||
|         self.check_split("the ['wibble-wobble'] widget", |         self.check_split("the ['wibble-wobble'] widget", | ||||||
|                          ['the', ' ', "['wibble-", "wobble']", ' ', 'widget']) |                          ['the', ' ', "['wibble-", "wobble']", ' ', 'widget']) | ||||||
| 
 | 
 | ||||||
|  |         # The test tests current behavior but is not testing parts of the API. | ||||||
|  |         self.check_split("what-d'you-call-it.", | ||||||
|  |                          "what-d'you-|call-|it.".split('|')) | ||||||
|  | 
 | ||||||
|     def test_funky_parens (self): |     def test_funky_parens (self): | ||||||
|         # Second part of SF bug #596434: long option strings inside |         # Second part of SF bug #596434: long option strings inside | ||||||
|         # parentheses. |         # parentheses. | ||||||
|  |  | ||||||
|  | @ -79,10 +79,25 @@ class TextWrapper: | ||||||
|     # splits into |     # splits into | ||||||
|     #   Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! |     #   Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! | ||||||
|     # (after stripping out empty strings). |     # (after stripping out empty strings). | ||||||
|     wordsep_re = re.compile( |     word_punct = r'[\w!"\'&.,?]' | ||||||
|         r'(\s+|'                                  # any whitespace |     letter = r'[^\d\W]' | ||||||
|         r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|'   # hyphenated words |     wordsep_re = re.compile(r''' | ||||||
|         r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')   # em-dash |         ( # any whitespace | ||||||
|  |           \s+ | ||||||
|  |         | # em-dash between words | ||||||
|  |           (?<=%(wp)s) -{2,} (?=\w) | ||||||
|  |         | # word, possibly hyphenated | ||||||
|  |           \S+? (?: | ||||||
|  |             # hyphenated word | ||||||
|  |               -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-)) | ||||||
|  |               (?= %(lt)s -? %(lt)s) | ||||||
|  |             | # end of word | ||||||
|  |               (?=\s|\Z) | ||||||
|  |             | # em-dash | ||||||
|  |               (?<=%(wp)s) (?=-{2,}\w) | ||||||
|  |             ) | ||||||
|  |         )''' % {'wp': word_punct, 'lt': letter}, re.VERBOSE) | ||||||
|  |     del word_punct, letter | ||||||
| 
 | 
 | ||||||
|     # This less funky little regex just split on recognized spaces. E.g. |     # This less funky little regex just split on recognized spaces. E.g. | ||||||
|     #   "Hello there -- you goof-ball, use the -b option!" |     #   "Hello there -- you goof-ball, use the -b option!" | ||||||
|  |  | ||||||
|  | @ -26,6 +26,9 @@ Core and Builtins | ||||||
| Library | Library | ||||||
| ------- | ------- | ||||||
| 
 | 
 | ||||||
|  | - Issue #22687: Fixed some corner cases in breaking words in tetxtwrap. | ||||||
|  |   Got rid of quadratic complexity in breaking long words. | ||||||
|  | 
 | ||||||
| - Issue #20289: The copy module now uses pickle protocol 4 (PEP 3154) and | - Issue #20289: The copy module now uses pickle protocol 4 (PEP 3154) and | ||||||
|   supports copying of instances of classes whose __new__ method takes |   supports copying of instances of classes whose __new__ method takes | ||||||
|   keyword-only arguments. |   keyword-only arguments. | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Serhiy Storchaka
						Serhiy Storchaka