mirror of
				https://github.com/python/cpython.git
				synced 2025-10-29 20:51:26 +00:00 
			
		
		
		
	Follow-up to r67746 in order to restore backwards-compatibility for
those who (monkey-)patch TextWrapper.wordsep_re with a custom RE.
This commit is contained in:
		
							parent
							
								
									8d5934b25d
								
							
						
					
					
						commit
						3eef441700
					
				
					 1 changed files with 19 additions and 7 deletions
				
			
		|  | @ -84,7 +84,7 @@ class TextWrapper: | |||
|     # splits into | ||||
|     #   Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! | ||||
|     # (after stripping out empty strings). | ||||
|     wordsep_re = ( | ||||
|     wordsep_re = re.compile( | ||||
|         r'(\s+|'                                  # any whitespace | ||||
|         r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|'   # hyphenated words | ||||
|         r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')   # em-dash | ||||
|  | @ -93,7 +93,7 @@ class TextWrapper: | |||
|     #   "Hello there -- you goof-ball, use the -b option!" | ||||
|     # splits into | ||||
|     #   Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ | ||||
|     wordsep_simple_re = r'(\s+)' | ||||
|     wordsep_simple_re = re.compile(r'(\s+)') | ||||
| 
 | ||||
|     # XXX this is not locale- or charset-aware -- string.lowercase | ||||
|     # is US-ASCII only (and therefore English-only) | ||||
|  | @ -124,6 +124,13 @@ def __init__(self, | |||
|         self.drop_whitespace = drop_whitespace | ||||
|         self.break_on_hyphens = break_on_hyphens | ||||
| 
 | ||||
|         # recompile the regexes for Unicode mode -- done in this clumsy way for | ||||
|         # backwards compatibility because it's rather common to monkey-patch | ||||
|         # the TextWrapper class' wordsep_re attribute. | ||||
|         self.wordsep_re_uni = re.compile(self.wordsep_re.pattern, re.U) | ||||
|         self.wordsep_simple_re_uni = re.compile( | ||||
|             self.wordsep_simple_re.pattern, re.U) | ||||
| 
 | ||||
| 
 | ||||
|     # -- Private methods ----------------------------------------------- | ||||
|     # (possibly useful for subclasses to override) | ||||
|  | @ -160,12 +167,17 @@ def _split(self, text): | |||
|           'use', ' ', 'the', ' ', '-b', ' ', option!' | ||||
|         otherwise. | ||||
|         """ | ||||
|         flags = re.UNICODE if isinstance(text, unicode) else 0 | ||||
|         if self.break_on_hyphens: | ||||
|             pat = self.wordsep_re | ||||
|         if isinstance(text, unicode): | ||||
|             if self.break_on_hyphens: | ||||
|                 pat = self.wordsep_re_uni | ||||
|             else: | ||||
|                 pat = self.wordsep_simple_re_uni | ||||
|         else: | ||||
|             pat = self.wordsep_simple_re | ||||
|         chunks = re.compile(pat, flags).split(text) | ||||
|             if self.break_on_hyphens: | ||||
|                 pat = self.wordsep_re | ||||
|             else: | ||||
|                 pat = self.wordsep_simple_re | ||||
|         chunks = pat.split(text) | ||||
|         chunks = filter(None, chunks)  # remove empty chunks | ||||
|         return chunks | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Georg Brandl
						Georg Brandl