mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 21:51:50 +00:00 
			
		
		
		
	Revert 83784 adding functools.lru_cache() to the re module.
The problem is that the re module is imported by sysconfig and re needs functools which uses collections.OrderedDict() but the _collectionsmodule.c code is not yet constructed at this point in the build. The likely best solution will be to include _collections as part of the static build before the rest of the boot-strapping.
This commit is contained in:
		
							parent
							
								
									4f859ed9c7
								
							
						
					
					
						commit
						31022301b5
					
				
					 3 changed files with 84 additions and 14 deletions
				
			
		
							
								
								
									
										32
									
								
								Lib/re.py
									
										
									
									
									
								
							
							
						
						
									
										32
									
								
								Lib/re.py
									
										
									
									
									
								
							|  | @ -118,7 +118,6 @@ | |||
| import sys | ||||
| import sre_compile | ||||
| import sre_parse | ||||
| import functools | ||||
| 
 | ||||
| # public symbols | ||||
| __all__ = [ "match", "search", "sub", "subn", "split", "findall", | ||||
|  | @ -206,9 +205,9 @@ def compile(pattern, flags=0): | |||
|     return _compile(pattern, flags) | ||||
| 
 | ||||
| def purge(): | ||||
|     "Clear the regular expression caches" | ||||
|     _compile_typed.clear() | ||||
|     _compile_repl.clear() | ||||
|     "Clear the regular expression cache" | ||||
|     _cache.clear() | ||||
|     _cache_repl.clear() | ||||
| 
 | ||||
| def template(pattern, flags=0): | ||||
|     "Compile a template pattern, returning a pattern object" | ||||
|  | @ -290,12 +289,12 @@ def _shrink_cache(cache_dict, max_length, divisor=5): | |||
|             # Ignore problems if the cache changed from another thread. | ||||
|             pass | ||||
| 
 | ||||
| def _compile(*args): | ||||
|     return _compile_typed(type(args[0]), *args) | ||||
| 
 | ||||
| @functools.lru_cache(maxsize=_MAXCACHE) | ||||
| def _compile_typed(type, *key): | ||||
| def _compile(*key): | ||||
|     # internal: compile pattern | ||||
|     cachekey = (type(key[0]),) + key | ||||
|     p = _cache.get(cachekey) | ||||
|     if p is not None: | ||||
|         return p | ||||
|     pattern, flags = key | ||||
|     if isinstance(pattern, _pattern_type): | ||||
|         if flags: | ||||
|  | @ -304,14 +303,23 @@ def _compile_typed(type, *key): | |||
|         return pattern | ||||
|     if not sre_compile.isstring(pattern): | ||||
|         raise TypeError("first argument must be string or compiled pattern") | ||||
|     return sre_compile.compile(pattern, flags) | ||||
|     p = sre_compile.compile(pattern, flags) | ||||
|     if len(_cache) >= _MAXCACHE: | ||||
|         _shrink_cache(_cache, _MAXCACHE) | ||||
|     _cache[cachekey] = p | ||||
|     return p | ||||
| 
 | ||||
| @functools.lru_cache(maxsize=_MAXCACHE) | ||||
| def _compile_repl(*key): | ||||
|     # internal: compile replacement pattern | ||||
|     p = _cache_repl.get(key) | ||||
|     if p is not None: | ||||
|         return p | ||||
|     repl, pattern = key | ||||
|     return sre_parse.parse_template(repl, pattern) | ||||
|     p = sre_parse.parse_template(repl, pattern) | ||||
|     if len(_cache_repl) >= _MAXCACHE: | ||||
|         _shrink_cache(_cache_repl, _MAXCACHE) | ||||
|     _cache_repl[key] = p | ||||
|     return p | ||||
| 
 | ||||
| def _expand(pattern, match, template): | ||||
|     # internal: match.expand implementation hook | ||||
|  |  | |||
|  | @ -875,8 +875,70 @@ def run_re_tests(): | |||
|                     print('=== Fails on unicode-sensitive match', t) | ||||
| 
 | ||||
| 
 | ||||
| class ReCacheTests(unittest.TestCase): | ||||
|     """These tests are specific to the re._shrink_cache implementation.""" | ||||
| 
 | ||||
|     def setUp(self): | ||||
|         self._orig_maxcache = re._MAXCACHE | ||||
| 
 | ||||
|     def tearDown(self): | ||||
|         re._MAXCACHE = self._orig_maxcache | ||||
| 
 | ||||
|     def test_compile_cache_overflow(self): | ||||
|         # NOTE: If a profiler or debugger is tracing code and compiling | ||||
|         # regular expressions while tracing through this test... expect | ||||
|         # the test to fail.  This test is not concurrency safe. | ||||
| 
 | ||||
|         # Explicitly fill the caches. | ||||
|         re._MAXCACHE = 20 | ||||
|         max_cache = re._MAXCACHE | ||||
|         unique_chars = tuple(chr(char_num) for char_num in | ||||
|                              range(b'a'[0], b'a'[0]+max_cache)) | ||||
|         re._cache.clear() | ||||
|         for char in unique_chars: | ||||
|             re._compile(char, 0) | ||||
|         self.assertEqual(max_cache, len(re._cache)) | ||||
|         re._cache_repl.clear() | ||||
|         for char in unique_chars: | ||||
|             re._compile_repl(char*2, char) | ||||
|         self.assertEqual(max_cache, len(re._cache_repl)) | ||||
| 
 | ||||
|         # Overflow both caches and make sure they have extra room left | ||||
|         # afterwards as well as having more than a single entry. | ||||
|         re._compile('A', 0) | ||||
|         self.assertLess(len(re._cache), max_cache) | ||||
|         self.assertGreater(len(re._cache), 1) | ||||
|         re._compile_repl('A', 'A') | ||||
|         self.assertLess(len(re._cache_repl), max_cache) | ||||
|         self.assertGreater(len(re._cache_repl), 1) | ||||
| 
 | ||||
|     def test_shrink_cache_at_limit(self): | ||||
|         cache = dict(zip(range(6), range(6))) | ||||
|         re._shrink_cache(cache, 6, divisor=3) | ||||
|         self.assertEqual(4, len(cache)) | ||||
| 
 | ||||
|     def test_shrink_cache_empty(self): | ||||
|         cache = {} | ||||
|         re._shrink_cache(cache, 6, divisor=3) | ||||
|         # Cache was empty, make sure we didn't raise an exception. | ||||
|         self.assertEqual(0, len(cache)) | ||||
| 
 | ||||
|     def test_shrink_cache_overflowing(self): | ||||
|         cache = dict(zip(range(6), range(6))) | ||||
|         re._shrink_cache(cache, 4, divisor=2) | ||||
|         # Cache was larger than the maximum, be sure we shrunk to smaller. | ||||
|         self.assertEqual(2, len(cache)) | ||||
| 
 | ||||
|     def test_shrink_cache_underflow(self): | ||||
|         cache = dict(zip(range(6), range(6))) | ||||
|         # No shrinking to do. | ||||
|         re._shrink_cache(cache, 9, divisor=3) | ||||
|         self.assertEqual(6, len(cache)) | ||||
| 
 | ||||
| 
 | ||||
| def test_main(): | ||||
|     run_unittest(ReTests) | ||||
|     run_unittest(ReCacheTests) | ||||
|     run_re_tests() | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|  |  | |||
|  | @ -697,8 +697,8 @@ Library | |||
| 
 | ||||
| - The default size of the re module's compiled regular expression cache has been | ||||
|   increased from 100 to 500 and the cache replacement policy has changed from | ||||
|   simply clearing the entire cache on overflow to forgetting the least recently | ||||
|   used cached compiled regular expressions.  This is a performance win for | ||||
|   simply clearing the entire cache on overflow to randomly forgetting 20% of the | ||||
|   existing cached compiled regular expressions.  This is a performance win for | ||||
|   applications that use a lot of regular expressions and limits the impact of | ||||
|   the performance hit anytime the cache is exceeded. | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Raymond Hettinger
						Raymond Hettinger