mirror of
				https://github.com/python/cpython.git
				synced 2025-10-25 10:44:55 +00:00 
			
		
		
		
	 b9c46a2c2d
			
		
	
	
		b9c46a2c2d
		
			
		
	
	
	
	
		
			
			bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time.
		
			
				
	
	
		
			174 lines
		
	
	
	
		
			5.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			174 lines
		
	
	
	
		
			5.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """Filename matching with shell patterns.
 | |
| 
 | |
| fnmatch(FILENAME, PATTERN) matches according to the local convention.
 | |
| fnmatchcase(FILENAME, PATTERN) always takes case in account.
 | |
| 
 | |
| The functions operate by translating the pattern into a regular
 | |
| expression.  They cache the compiled regular expressions for speed.
 | |
| 
 | |
| The function translate(PATTERN) returns a regular expression
 | |
| corresponding to PATTERN.  (It does not compile it.)
 | |
| """
 | |
| import os
 | |
| import posixpath
 | |
| import re
 | |
| import functools
 | |
| 
 | |
| __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
 | |
| 
 | |
| def fnmatch(name, pat):
 | |
|     """Test whether FILENAME matches PATTERN.
 | |
| 
 | |
|     Patterns are Unix shell style:
 | |
| 
 | |
|     *       matches everything
 | |
|     ?       matches any single character
 | |
|     [seq]   matches any character in seq
 | |
|     [!seq]  matches any char not in seq
 | |
| 
 | |
|     An initial period in FILENAME is not special.
 | |
|     Both FILENAME and PATTERN are first case-normalized
 | |
|     if the operating system requires it.
 | |
|     If you don't want this, use fnmatchcase(FILENAME, PATTERN).
 | |
|     """
 | |
|     name = os.path.normcase(name)
 | |
|     pat = os.path.normcase(pat)
 | |
|     return fnmatchcase(name, pat)
 | |
| 
 | |
| @functools.lru_cache(maxsize=256, typed=True)
 | |
| def _compile_pattern(pat):
 | |
|     if isinstance(pat, bytes):
 | |
|         pat_str = str(pat, 'ISO-8859-1')
 | |
|         res_str = translate(pat_str)
 | |
|         res = bytes(res_str, 'ISO-8859-1')
 | |
|     else:
 | |
|         res = translate(pat)
 | |
|     return re.compile(res).match
 | |
| 
 | |
| def filter(names, pat):
 | |
|     """Return the subset of the list NAMES that match PAT."""
 | |
|     result = []
 | |
|     pat = os.path.normcase(pat)
 | |
|     match = _compile_pattern(pat)
 | |
|     if os.path is posixpath:
 | |
|         # normcase on posix is NOP. Optimize it away from the loop.
 | |
|         for name in names:
 | |
|             if match(name):
 | |
|                 result.append(name)
 | |
|     else:
 | |
|         for name in names:
 | |
|             if match(os.path.normcase(name)):
 | |
|                 result.append(name)
 | |
|     return result
 | |
| 
 | |
| def fnmatchcase(name, pat):
 | |
|     """Test whether FILENAME matches PATTERN, including case.
 | |
| 
 | |
|     This is a version of fnmatch() which doesn't case-normalize
 | |
|     its arguments.
 | |
|     """
 | |
|     match = _compile_pattern(pat)
 | |
|     return match(name) is not None
 | |
| 
 | |
| 
 | |
| def translate(pat):
 | |
|     """Translate a shell PATTERN to a regular expression.
 | |
| 
 | |
|     There is no way to quote meta-characters.
 | |
|     """
 | |
| 
 | |
|     STAR = object()
 | |
|     res = []
 | |
|     add = res.append
 | |
|     i, n = 0, len(pat)
 | |
|     while i < n:
 | |
|         c = pat[i]
 | |
|         i = i+1
 | |
|         if c == '*':
 | |
|             # compress consecutive `*` into one
 | |
|             if (not res) or res[-1] is not STAR:
 | |
|                 add(STAR)
 | |
|         elif c == '?':
 | |
|             add('.')
 | |
|         elif c == '[':
 | |
|             j = i
 | |
|             if j < n and pat[j] == '!':
 | |
|                 j = j+1
 | |
|             if j < n and pat[j] == ']':
 | |
|                 j = j+1
 | |
|             while j < n and pat[j] != ']':
 | |
|                 j = j+1
 | |
|             if j >= n:
 | |
|                 add('\\[')
 | |
|             else:
 | |
|                 stuff = pat[i:j]
 | |
|                 if '--' not in stuff:
 | |
|                     stuff = stuff.replace('\\', r'\\')
 | |
|                 else:
 | |
|                     chunks = []
 | |
|                     k = i+2 if pat[i] == '!' else i+1
 | |
|                     while True:
 | |
|                         k = pat.find('-', k, j)
 | |
|                         if k < 0:
 | |
|                             break
 | |
|                         chunks.append(pat[i:k])
 | |
|                         i = k+1
 | |
|                         k = k+3
 | |
|                     chunks.append(pat[i:j])
 | |
|                     # Escape backslashes and hyphens for set difference (--).
 | |
|                     # Hyphens that create ranges shouldn't be escaped.
 | |
|                     stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
 | |
|                                      for s in chunks)
 | |
|                 # Escape set operations (&&, ~~ and ||).
 | |
|                 stuff = re.sub(r'([&~|])', r'\\\1', stuff)
 | |
|                 i = j+1
 | |
|                 if stuff[0] == '!':
 | |
|                     stuff = '^' + stuff[1:]
 | |
|                 elif stuff[0] in ('^', '['):
 | |
|                     stuff = '\\' + stuff
 | |
|                 add(f'[{stuff}]')
 | |
|         else:
 | |
|             add(re.escape(c))
 | |
|     assert i == n
 | |
| 
 | |
|     # Deal with STARs.
 | |
|     inp = res
 | |
|     res = []
 | |
|     add = res.append
 | |
|     i, n = 0, len(inp)
 | |
|     # Fixed pieces at the start?
 | |
|     while i < n and inp[i] is not STAR:
 | |
|         add(inp[i])
 | |
|         i += 1
 | |
|     # Now deal with STAR fixed STAR fixed ...
 | |
|     # For an interior `STAR fixed` pairing, we want to do a minimal
 | |
|     # .*? match followed by `fixed`, with no possibility of backtracking.
 | |
|     # We can't spell that directly, but can trick it into working by matching
 | |
|     #    .*?fixed
 | |
|     # in a lookahead assertion, save the matched part in a group, then
 | |
|     # consume that group via a backreference. If the overall match fails,
 | |
|     # the lookahead assertion won't try alternatives. So the translation is:
 | |
|     #     (?=(P<name>.*?fixed))(?P=name)
 | |
|     # Group names are created as needed: g1, g2, g3, ...
 | |
|     groupnum = 0
 | |
|     while i < n:
 | |
|         assert inp[i] is STAR
 | |
|         i += 1
 | |
|         if i == n:
 | |
|             add(".*")
 | |
|             break
 | |
|         assert inp[i] is not STAR
 | |
|         fixed = []
 | |
|         while i < n and inp[i] is not STAR:
 | |
|             fixed.append(inp[i])
 | |
|             i += 1
 | |
|         fixed = "".join(fixed)
 | |
|         if i == n:
 | |
|             add(".*")
 | |
|             add(fixed)
 | |
|         else:
 | |
|             groupnum += 1
 | |
|             add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
 | |
|     assert i == n
 | |
|     res = "".join(res)
 | |
|     return fr'(?s:{res})\Z'
 |