mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	#2659: add `break_on_hyphens` to TextWrapper.
				
					
				
			This commit is contained in:
		
							parent
							
								
									5b54887deb
								
							
						
					
					
						commit
						6f95ae55b1
					
				
					 4 changed files with 48 additions and 3 deletions
				
			
		|  | @ -41,6 +41,10 @@ instance and calling a single method on it.  That instance is not reused, so for | |||
| applications that wrap/fill many text strings, it will be more efficient for you | ||||
| to create your own :class:`TextWrapper` object. | ||||
| 
 | ||||
| Text is preferably wrapped on whitespaces and right after the hyphens in | ||||
| hyphenated words; only then will long words be broken if necessary, unless | ||||
| :attr:`TextWrapper.break_long_words` is set to false. | ||||
| 
 | ||||
| An additional utility function, :func:`dedent`, is provided to remove | ||||
| indentation from strings that have unwanted whitespace to the left of the text. | ||||
| 
 | ||||
|  | @ -174,10 +178,22 @@ indentation from strings that have unwanted whitespace to the left of the text. | |||
|       than :attr:`width`.  (Long words will be put on a line by themselves, in | ||||
|       order to minimize the amount by which :attr:`width` is exceeded.) | ||||
| 
 | ||||
| 
 | ||||
|    .. attribute:: break_on_hyphens | ||||
| 
 | ||||
|       (default: ``True``) If true, wrapping will occur preferably on whitespaces | ||||
|       and right after hyphens in compound words, as it is customary in English. | ||||
|       If false, only whitespaces will be considered as potentially good places | ||||
|       for line breaks, but you need to set :attr:`break_long_words` to false if | ||||
|       you want truly insecable words.  Default behaviour in previous versions | ||||
|       was to always allow breaking hyphenated words. | ||||
| 
 | ||||
|       .. versionadded:: 2.6 | ||||
| 
 | ||||
| 
 | ||||
|    :class:`TextWrapper` also provides two public methods, analogous to the | ||||
|    module-level convenience functions: | ||||
| 
 | ||||
| 
 | ||||
|    .. method:: wrap(text) | ||||
| 
 | ||||
|       Wraps the single paragraph in *text* (a string) so every line is at most | ||||
|  |  | |||
|  | @ -364,6 +364,14 @@ def test_split(self): | |||
|              ["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-", | ||||
|               "ball,", " ", "use", " ", "the", " ", "-b", " ",  "option!"]) | ||||
| 
 | ||||
|     def test_break_on_hyphens(self): | ||||
|         # Ensure that the break_on_hyphens attributes work | ||||
|         text = "yaba daba-doo" | ||||
|         self.check_wrap(text, 10, ["yaba daba-", "doo"], | ||||
|                         break_on_hyphens=True) | ||||
|         self.check_wrap(text, 10, ["yaba", "daba-doo"], | ||||
|                         break_on_hyphens=False) | ||||
| 
 | ||||
|     def test_bad_width(self): | ||||
|         # Ensure that width <= 0 is caught. | ||||
|         text = "Whatever, it doesn't matter." | ||||
|  |  | |||
|  | @ -63,6 +63,10 @@ class TextWrapper: | |||
|       break_long_words (default: true) | ||||
|         Break words longer than 'width'.  If false, those words will not | ||||
|         be broken, and some lines might be longer than 'width'. | ||||
|       break_on_hyphens (default: true) | ||||
|         Allow breaking hyphenated words. If true, wrapping will occur | ||||
|         preferably on whitespaces and right after hyphens part of | ||||
|         compound words. | ||||
|       drop_whitespace (default: true) | ||||
|         Drop leading and trailing whitespace from lines. | ||||
|     """ | ||||
|  | @ -85,6 +89,12 @@ class TextWrapper: | |||
|         r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|'   # hyphenated words | ||||
|         r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')   # em-dash | ||||
| 
 | ||||
|     # This less funky little regex just split on recognized spaces. E.g. | ||||
|     #   "Hello there -- you goof-ball, use the -b option!" | ||||
|     # splits into | ||||
|     #   Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ | ||||
|     wordsep_simple_re = re.compile(r'(\s+)') | ||||
| 
 | ||||
|     # XXX this is not locale- or charset-aware -- string.lowercase | ||||
|     # is US-ASCII only (and therefore English-only) | ||||
|     sentence_end_re = re.compile(r'[%s]'              # lowercase letter | ||||
|  | @ -102,7 +112,8 @@ def __init__(self, | |||
|                  replace_whitespace=True, | ||||
|                  fix_sentence_endings=False, | ||||
|                  break_long_words=True, | ||||
|                  drop_whitespace=True): | ||||
|                  drop_whitespace=True, | ||||
|                  break_on_hyphens=True): | ||||
|         self.width = width | ||||
|         self.initial_indent = initial_indent | ||||
|         self.subsequent_indent = subsequent_indent | ||||
|  | @ -111,6 +122,7 @@ def __init__(self, | |||
|         self.fix_sentence_endings = fix_sentence_endings | ||||
|         self.break_long_words = break_long_words | ||||
|         self.drop_whitespace = drop_whitespace | ||||
|         self.break_on_hyphens = break_on_hyphens | ||||
| 
 | ||||
| 
 | ||||
|     # -- Private methods ----------------------------------------------- | ||||
|  | @ -143,8 +155,15 @@ def _split(self, text): | |||
|         breaks into the following chunks: | ||||
|           'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ', | ||||
|           'use', ' ', 'the', ' ', '-b', ' ', 'option!' | ||||
|         if break_on_hyphens is True, or in: | ||||
|           'Look,', ' ', 'goof-ball', ' ', '--', ' ', | ||||
|           'use', ' ', 'the', ' ', '-b', ' ', option!' | ||||
|         otherwise. | ||||
|         """ | ||||
|         chunks = self.wordsep_re.split(text) | ||||
|         if self.break_on_hyphens is True: | ||||
|             chunks = self.wordsep_re.split(text) | ||||
|         else: | ||||
|             chunks = self.wordsep_simple_re.split(text) | ||||
|         chunks = filter(None, chunks)  # remove empty chunks | ||||
|         return chunks | ||||
| 
 | ||||
|  |  | |||
|  | @ -23,6 +23,8 @@ Extension Modules | |||
| Library | ||||
| ------- | ||||
| 
 | ||||
| - #2659: Added ``break_on_hyphens`` option to textwrap TextWrapper class. | ||||
| 
 | ||||
| - The mhlib module has been deprecated for removal in Python 3.0. | ||||
| 
 | ||||
| - The linuxaudiodev module has been deprecated for removal in Python 3.0. | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Georg Brandl
						Georg Brandl