mirror of
				https://github.com/python/cpython.git
				synced 2025-10-25 02:43:41 +00:00 
			
		
		
		
	 e83ce850f4
			
		
	
	
		e83ce850f4
		
			
		
	
	
	
	
		
			
			Add private `posixpath._realpath()` function, which is a generic version of `realpath()` that can be parameterised with string tokens (`sep`, `curdir`, `pardir`) and query functions (`getcwd`, `lstat`, `readlink`). Also add support for limiting the number of symlink traversals. In the private `pathlib._abc.PathBase` class, call `posixpath._realpath()` and remove our re-implementation of the same algorithm. No change to any public APIs, either in `posixpath` or `pathlib`. Co-authored-by: Nice Zombies <nineteendo19d0@gmail.com>
		
			
				
	
	
		
			576 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			576 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """Common operations on Posix pathnames.
 | |
| 
 | |
| Instead of importing this module directly, import os and refer to
 | |
| this module as os.path.  The "os.path" name is an alias for this
 | |
| module on Posix systems; on other systems (e.g. Windows),
 | |
| os.path provides the same operations in a manner specific to that
 | |
| platform, and is an alias to another module (e.g. ntpath).
 | |
| 
 | |
| Some of this can actually be useful on non-Posix systems too, e.g.
 | |
| for manipulation of the pathname component of URLs.
 | |
| """
 | |
| 
 | |
| # Strings representing various path-related bits and pieces.
 | |
| # These are primarily for export; internally, they are hardcoded.
 | |
| # Should be set before imports for resolving cyclic dependency.
 | |
| curdir = '.'
 | |
| pardir = '..'
 | |
| extsep = '.'
 | |
| sep = '/'
 | |
| pathsep = ':'
 | |
| defpath = '/bin:/usr/bin'
 | |
| altsep = None
 | |
| devnull = '/dev/null'
 | |
| 
 | |
| import errno
 | |
| import os
 | |
| import sys
 | |
| import stat
 | |
| import genericpath
 | |
| from genericpath import *
 | |
| 
 | |
| __all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext",
 | |
|            "basename","dirname","commonprefix","getsize","getmtime",
 | |
|            "getatime","getctime","islink","exists","lexists","isdir","isfile",
 | |
|            "ismount", "expanduser","expandvars","normpath","abspath",
 | |
|            "samefile","sameopenfile","samestat",
 | |
|            "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
 | |
|            "devnull","realpath","supports_unicode_filenames","relpath",
 | |
|            "commonpath", "isjunction","isdevdrive"]
 | |
| 
 | |
| 
 | |
| def _get_sep(path):
 | |
|     if isinstance(path, bytes):
 | |
|         return b'/'
 | |
|     else:
 | |
|         return '/'
 | |
| 
 | |
| # Normalize the case of a pathname.  Trivial in Posix, string.lower on Mac.
 | |
| # On MS-DOS this may also turn slashes into backslashes; however, other
 | |
| # normalizations (such as optimizing '../' away) are not allowed
 | |
| # (another function should be defined to do that).
 | |
| 
 | |
| def normcase(s):
 | |
|     """Normalize case of pathname.  Has no effect under Posix"""
 | |
|     return os.fspath(s)
 | |
| 
 | |
| 
 | |
| # Return whether a path is absolute.
 | |
| # Trivial in Posix, harder on the Mac or MS-DOS.
 | |
| 
 | |
| def isabs(s):
 | |
|     """Test whether a path is absolute"""
 | |
|     s = os.fspath(s)
 | |
|     sep = _get_sep(s)
 | |
|     return s.startswith(sep)
 | |
| 
 | |
| 
 | |
| # Join pathnames.
 | |
| # Ignore the previous parts if a part is absolute.
 | |
| # Insert a '/' unless the first part is empty or already ends in '/'.
 | |
| 
 | |
| def join(a, *p):
 | |
|     """Join two or more pathname components, inserting '/' as needed.
 | |
|     If any component is an absolute path, all previous path components
 | |
|     will be discarded.  An empty last part will result in a path that
 | |
|     ends with a separator."""
 | |
|     a = os.fspath(a)
 | |
|     sep = _get_sep(a)
 | |
|     path = a
 | |
|     try:
 | |
|         for b in p:
 | |
|             b = os.fspath(b)
 | |
|             if b.startswith(sep) or not path:
 | |
|                 path = b
 | |
|             elif path.endswith(sep):
 | |
|                 path += b
 | |
|             else:
 | |
|                 path += sep + b
 | |
|     except (TypeError, AttributeError, BytesWarning):
 | |
|         genericpath._check_arg_types('join', a, *p)
 | |
|         raise
 | |
|     return path
 | |
| 
 | |
| 
 | |
| # Split a path in head (everything up to the last '/') and tail (the
 | |
| # rest).  If the path ends in '/', tail will be empty.  If there is no
 | |
| # '/' in the path, head  will be empty.
 | |
| # Trailing '/'es are stripped from head unless it is the root.
 | |
| 
 | |
| def split(p):
 | |
|     """Split a pathname.  Returns tuple "(head, tail)" where "tail" is
 | |
|     everything after the final slash.  Either part may be empty."""
 | |
|     p = os.fspath(p)
 | |
|     sep = _get_sep(p)
 | |
|     i = p.rfind(sep) + 1
 | |
|     head, tail = p[:i], p[i:]
 | |
|     if head and head != sep*len(head):
 | |
|         head = head.rstrip(sep)
 | |
|     return head, tail
 | |
| 
 | |
| 
 | |
| # Split a path in root and extension.
 | |
| # The extension is everything starting at the last dot in the last
 | |
| # pathname component; the root is everything before that.
 | |
| # It is always true that root + ext == p.
 | |
| 
 | |
| def splitext(p):
 | |
|     p = os.fspath(p)
 | |
|     if isinstance(p, bytes):
 | |
|         sep = b'/'
 | |
|         extsep = b'.'
 | |
|     else:
 | |
|         sep = '/'
 | |
|         extsep = '.'
 | |
|     return genericpath._splitext(p, sep, None, extsep)
 | |
| splitext.__doc__ = genericpath._splitext.__doc__
 | |
| 
 | |
| # Split a pathname into a drive specification and the rest of the
 | |
| # path.  Useful on DOS/Windows/NT; on Unix, the drive is always empty.
 | |
| 
 | |
| def splitdrive(p):
 | |
|     """Split a pathname into drive and path. On Posix, drive is always
 | |
|     empty."""
 | |
|     p = os.fspath(p)
 | |
|     return p[:0], p
 | |
| 
 | |
| 
 | |
| try:
 | |
|     from posix import _path_splitroot_ex as splitroot
 | |
| except ImportError:
 | |
|     def splitroot(p):
 | |
|         """Split a pathname into drive, root and tail.
 | |
| 
 | |
|         The tail contains anything after the root."""
 | |
|         p = os.fspath(p)
 | |
|         if isinstance(p, bytes):
 | |
|             sep = b'/'
 | |
|             empty = b''
 | |
|         else:
 | |
|             sep = '/'
 | |
|             empty = ''
 | |
|         if p[:1] != sep:
 | |
|             # Relative path, e.g.: 'foo'
 | |
|             return empty, empty, p
 | |
|         elif p[1:2] != sep or p[2:3] == sep:
 | |
|             # Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
 | |
|             return empty, sep, p[1:]
 | |
|         else:
 | |
|             # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
 | |
|             # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
 | |
|             return empty, p[:2], p[2:]
 | |
| 
 | |
| 
 | |
| # Return the tail (basename) part of a path, same as split(path)[1].
 | |
| 
 | |
| def basename(p):
 | |
|     """Returns the final component of a pathname"""
 | |
|     p = os.fspath(p)
 | |
|     sep = _get_sep(p)
 | |
|     i = p.rfind(sep) + 1
 | |
|     return p[i:]
 | |
| 
 | |
| 
 | |
| # Return the head (dirname) part of a path, same as split(path)[0].
 | |
| 
 | |
| def dirname(p):
 | |
|     """Returns the directory component of a pathname"""
 | |
|     p = os.fspath(p)
 | |
|     sep = _get_sep(p)
 | |
|     i = p.rfind(sep) + 1
 | |
|     head = p[:i]
 | |
|     if head and head != sep*len(head):
 | |
|         head = head.rstrip(sep)
 | |
|     return head
 | |
| 
 | |
| 
 | |
| # Is a path a mount point?
 | |
| # (Does this work for all UNIXes?  Is it even guaranteed to work by Posix?)
 | |
| 
 | |
| def ismount(path):
 | |
|     """Test whether a path is a mount point"""
 | |
|     try:
 | |
|         s1 = os.lstat(path)
 | |
|     except (OSError, ValueError):
 | |
|         # It doesn't exist -- so not a mount point. :-)
 | |
|         return False
 | |
|     else:
 | |
|         # A symlink can never be a mount point
 | |
|         if stat.S_ISLNK(s1.st_mode):
 | |
|             return False
 | |
| 
 | |
|     path = os.fspath(path)
 | |
|     if isinstance(path, bytes):
 | |
|         parent = join(path, b'..')
 | |
|     else:
 | |
|         parent = join(path, '..')
 | |
|     try:
 | |
|         s2 = os.lstat(parent)
 | |
|     except OSError:
 | |
|         parent = realpath(parent)
 | |
|         try:
 | |
|             s2 = os.lstat(parent)
 | |
|         except OSError:
 | |
|             return False
 | |
| 
 | |
|     # path/.. on a different device as path or the same i-node as path
 | |
|     return s1.st_dev != s2.st_dev or s1.st_ino == s2.st_ino
 | |
| 
 | |
| 
 | |
| # Expand paths beginning with '~' or '~user'.
 | |
| # '~' means $HOME; '~user' means that user's home directory.
 | |
| # If the path doesn't begin with '~', or if the user or $HOME is unknown,
 | |
| # the path is returned unchanged (leaving error reporting to whatever
 | |
| # function is called with the expanded path as argument).
 | |
| # See also module 'glob' for expansion of *, ? and [...] in pathnames.
 | |
| # (A function should also be defined to do full *sh-style environment
 | |
| # variable expansion.)
 | |
| 
 | |
| def expanduser(path):
 | |
|     """Expand ~ and ~user constructions.  If user or $HOME is unknown,
 | |
|     do nothing."""
 | |
|     path = os.fspath(path)
 | |
|     if isinstance(path, bytes):
 | |
|         tilde = b'~'
 | |
|     else:
 | |
|         tilde = '~'
 | |
|     if not path.startswith(tilde):
 | |
|         return path
 | |
|     sep = _get_sep(path)
 | |
|     i = path.find(sep, 1)
 | |
|     if i < 0:
 | |
|         i = len(path)
 | |
|     if i == 1:
 | |
|         if 'HOME' not in os.environ:
 | |
|             try:
 | |
|                 import pwd
 | |
|             except ImportError:
 | |
|                 # pwd module unavailable, return path unchanged
 | |
|                 return path
 | |
|             try:
 | |
|                 userhome = pwd.getpwuid(os.getuid()).pw_dir
 | |
|             except KeyError:
 | |
|                 # bpo-10496: if the current user identifier doesn't exist in the
 | |
|                 # password database, return the path unchanged
 | |
|                 return path
 | |
|         else:
 | |
|             userhome = os.environ['HOME']
 | |
|     else:
 | |
|         try:
 | |
|             import pwd
 | |
|         except ImportError:
 | |
|             # pwd module unavailable, return path unchanged
 | |
|             return path
 | |
|         name = path[1:i]
 | |
|         if isinstance(name, bytes):
 | |
|             name = os.fsdecode(name)
 | |
|         try:
 | |
|             pwent = pwd.getpwnam(name)
 | |
|         except KeyError:
 | |
|             # bpo-10496: if the user name from the path doesn't exist in the
 | |
|             # password database, return the path unchanged
 | |
|             return path
 | |
|         userhome = pwent.pw_dir
 | |
|     # if no user home, return the path unchanged on VxWorks
 | |
|     if userhome is None and sys.platform == "vxworks":
 | |
|         return path
 | |
|     if isinstance(path, bytes):
 | |
|         userhome = os.fsencode(userhome)
 | |
|     userhome = userhome.rstrip(sep)
 | |
|     return (userhome + path[i:]) or sep
 | |
| 
 | |
| 
 | |
| # Expand paths containing shell variable substitutions.
 | |
| # This expands the forms $variable and ${variable} only.
 | |
| # Non-existent variables are left unchanged.
 | |
| 
 | |
| _varprog = None
 | |
| _varprogb = None
 | |
| 
 | |
| def expandvars(path):
 | |
|     """Expand shell variables of form $var and ${var}.  Unknown variables
 | |
|     are left unchanged."""
 | |
|     path = os.fspath(path)
 | |
|     global _varprog, _varprogb
 | |
|     if isinstance(path, bytes):
 | |
|         if b'$' not in path:
 | |
|             return path
 | |
|         if not _varprogb:
 | |
|             import re
 | |
|             _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
 | |
|         search = _varprogb.search
 | |
|         start = b'{'
 | |
|         end = b'}'
 | |
|         environ = getattr(os, 'environb', None)
 | |
|     else:
 | |
|         if '$' not in path:
 | |
|             return path
 | |
|         if not _varprog:
 | |
|             import re
 | |
|             _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
 | |
|         search = _varprog.search
 | |
|         start = '{'
 | |
|         end = '}'
 | |
|         environ = os.environ
 | |
|     i = 0
 | |
|     while True:
 | |
|         m = search(path, i)
 | |
|         if not m:
 | |
|             break
 | |
|         i, j = m.span(0)
 | |
|         name = m.group(1)
 | |
|         if name.startswith(start) and name.endswith(end):
 | |
|             name = name[1:-1]
 | |
|         try:
 | |
|             if environ is None:
 | |
|                 value = os.fsencode(os.environ[os.fsdecode(name)])
 | |
|             else:
 | |
|                 value = environ[name]
 | |
|         except KeyError:
 | |
|             i = j
 | |
|         else:
 | |
|             tail = path[j:]
 | |
|             path = path[:i] + value
 | |
|             i = len(path)
 | |
|             path += tail
 | |
|     return path
 | |
| 
 | |
| 
 | |
| # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
 | |
| # It should be understood that this may change the meaning of the path
 | |
| # if it contains symbolic links!
 | |
| 
 | |
| try:
 | |
|     from posix import _path_normpath as normpath
 | |
| 
 | |
| except ImportError:
 | |
|     def normpath(path):
 | |
|         """Normalize path, eliminating double slashes, etc."""
 | |
|         path = os.fspath(path)
 | |
|         if isinstance(path, bytes):
 | |
|             sep = b'/'
 | |
|             dot = b'.'
 | |
|             dotdot = b'..'
 | |
|         else:
 | |
|             sep = '/'
 | |
|             dot = '.'
 | |
|             dotdot = '..'
 | |
|         if not path:
 | |
|             return dot
 | |
|         _, initial_slashes, path = splitroot(path)
 | |
|         comps = path.split(sep)
 | |
|         new_comps = []
 | |
|         for comp in comps:
 | |
|             if not comp or comp == dot:
 | |
|                 continue
 | |
|             if (comp != dotdot or (not initial_slashes and not new_comps) or
 | |
|                  (new_comps and new_comps[-1] == dotdot)):
 | |
|                 new_comps.append(comp)
 | |
|             elif new_comps:
 | |
|                 new_comps.pop()
 | |
|         comps = new_comps
 | |
|         path = initial_slashes + sep.join(comps)
 | |
|         return path or dot
 | |
| 
 | |
| 
 | |
| def abspath(path):
 | |
|     """Return an absolute path."""
 | |
|     path = os.fspath(path)
 | |
|     if isinstance(path, bytes):
 | |
|         if not path.startswith(b'/'):
 | |
|             path = join(os.getcwdb(), path)
 | |
|     else:
 | |
|         if not path.startswith('/'):
 | |
|             path = join(os.getcwd(), path)
 | |
|     return normpath(path)
 | |
| 
 | |
| 
 | |
| # Return a canonical path (i.e. the absolute location of a file on the
 | |
| # filesystem).
 | |
| 
 | |
| def realpath(filename, *, strict=False):
 | |
|     """Return the canonical path of the specified filename, eliminating any
 | |
| symbolic links encountered in the path."""
 | |
|     filename = os.fspath(filename)
 | |
|     if isinstance(filename, bytes):
 | |
|         sep = b'/'
 | |
|         curdir = b'.'
 | |
|         pardir = b'..'
 | |
|         getcwd = os.getcwdb
 | |
|     else:
 | |
|         sep = '/'
 | |
|         curdir = '.'
 | |
|         pardir = '..'
 | |
|         getcwd = os.getcwd
 | |
|     return _realpath(filename, strict, sep, curdir, pardir, getcwd)
 | |
| 
 | |
| def _realpath(filename, strict=False, sep=sep, curdir=curdir, pardir=pardir,
 | |
|               getcwd=os.getcwd, lstat=os.lstat, readlink=os.readlink, maxlinks=None):
 | |
|     # The stack of unresolved path parts. When popped, a special value of None
 | |
|     # indicates that a symlink target has been resolved, and that the original
 | |
|     # symlink path can be retrieved by popping again. The [::-1] slice is a
 | |
|     # very fast way of spelling list(reversed(...)).
 | |
|     rest = filename.split(sep)[::-1]
 | |
| 
 | |
|     # The resolved path, which is absolute throughout this function.
 | |
|     # Note: getcwd() returns a normalized and symlink-free path.
 | |
|     path = sep if filename.startswith(sep) else getcwd()
 | |
| 
 | |
|     # Mapping from symlink paths to *fully resolved* symlink targets. If a
 | |
|     # symlink is encountered but not yet resolved, the value is None. This is
 | |
|     # used both to detect symlink loops and to speed up repeated traversals of
 | |
|     # the same links.
 | |
|     seen = {}
 | |
| 
 | |
|     # Number of symlinks traversed. When the number of traversals is limited
 | |
|     # by *maxlinks*, this is used instead of *seen* to detect symlink loops.
 | |
|     link_count = 0
 | |
| 
 | |
|     while rest:
 | |
|         name = rest.pop()
 | |
|         if name is None:
 | |
|             # resolved symlink target
 | |
|             seen[rest.pop()] = path
 | |
|             continue
 | |
|         if not name or name == curdir:
 | |
|             # current dir
 | |
|             continue
 | |
|         if name == pardir:
 | |
|             # parent dir
 | |
|             path = path[:path.rindex(sep)] or sep
 | |
|             continue
 | |
|         if path == sep:
 | |
|             newpath = path + name
 | |
|         else:
 | |
|             newpath = path + sep + name
 | |
|         try:
 | |
|             st = lstat(newpath)
 | |
|             if not stat.S_ISLNK(st.st_mode):
 | |
|                 path = newpath
 | |
|                 continue
 | |
|             elif maxlinks is not None:
 | |
|                 link_count += 1
 | |
|                 if link_count > maxlinks:
 | |
|                     if strict:
 | |
|                         raise OSError(errno.ELOOP, os.strerror(errno.ELOOP),
 | |
|                                       newpath)
 | |
|                     path = newpath
 | |
|                     continue
 | |
|             elif newpath in seen:
 | |
|                 # Already seen this path
 | |
|                 path = seen[newpath]
 | |
|                 if path is not None:
 | |
|                     # use cached value
 | |
|                     continue
 | |
|                 # The symlink is not resolved, so we must have a symlink loop.
 | |
|                 if strict:
 | |
|                     raise OSError(errno.ELOOP, os.strerror(errno.ELOOP),
 | |
|                                   newpath)
 | |
|                 path = newpath
 | |
|                 continue
 | |
|             target = readlink(newpath)
 | |
|         except OSError:
 | |
|             if strict:
 | |
|                 raise
 | |
|             path = newpath
 | |
|             continue
 | |
|         # Resolve the symbolic link
 | |
|         if target.startswith(sep):
 | |
|             # Symlink target is absolute; reset resolved path.
 | |
|             path = sep
 | |
|         if maxlinks is None:
 | |
|             # Mark this symlink as seen but not fully resolved.
 | |
|             seen[newpath] = None
 | |
|             # Push the symlink path onto the stack, and signal its specialness
 | |
|             # by also pushing None. When these entries are popped, we'll
 | |
|             # record the fully-resolved symlink target in the 'seen' mapping.
 | |
|             rest.append(newpath)
 | |
|             rest.append(None)
 | |
|         # Push the unresolved symlink target parts onto the stack.
 | |
|         rest.extend(target.split(sep)[::-1])
 | |
| 
 | |
|     return path
 | |
| 
 | |
| 
 | |
| supports_unicode_filenames = (sys.platform == 'darwin')
 | |
| 
 | |
| def relpath(path, start=None):
 | |
|     """Return a relative version of a path"""
 | |
| 
 | |
|     path = os.fspath(path)
 | |
|     if not path:
 | |
|         raise ValueError("no path specified")
 | |
| 
 | |
|     if isinstance(path, bytes):
 | |
|         curdir = b'.'
 | |
|         sep = b'/'
 | |
|         pardir = b'..'
 | |
|     else:
 | |
|         curdir = '.'
 | |
|         sep = '/'
 | |
|         pardir = '..'
 | |
| 
 | |
|     if start is None:
 | |
|         start = curdir
 | |
|     else:
 | |
|         start = os.fspath(start)
 | |
| 
 | |
|     try:
 | |
|         start_tail = abspath(start).lstrip(sep)
 | |
|         path_tail = abspath(path).lstrip(sep)
 | |
|         start_list = start_tail.split(sep) if start_tail else []
 | |
|         path_list = path_tail.split(sep) if path_tail else []
 | |
|         # Work out how much of the filepath is shared by start and path.
 | |
|         i = len(commonprefix([start_list, path_list]))
 | |
| 
 | |
|         rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
 | |
|         if not rel_list:
 | |
|             return curdir
 | |
|         return sep.join(rel_list)
 | |
|     except (TypeError, AttributeError, BytesWarning, DeprecationWarning):
 | |
|         genericpath._check_arg_types('relpath', path, start)
 | |
|         raise
 | |
| 
 | |
| 
 | |
| # Return the longest common sub-path of the sequence of paths given as input.
 | |
| # The paths are not normalized before comparing them (this is the
 | |
| # responsibility of the caller). Any trailing separator is stripped from the
 | |
| # returned path.
 | |
| 
 | |
| def commonpath(paths):
 | |
|     """Given a sequence of path names, returns the longest common sub-path."""
 | |
| 
 | |
|     paths = tuple(map(os.fspath, paths))
 | |
| 
 | |
|     if not paths:
 | |
|         raise ValueError('commonpath() arg is an empty sequence')
 | |
| 
 | |
|     if isinstance(paths[0], bytes):
 | |
|         sep = b'/'
 | |
|         curdir = b'.'
 | |
|     else:
 | |
|         sep = '/'
 | |
|         curdir = '.'
 | |
| 
 | |
|     try:
 | |
|         split_paths = [path.split(sep) for path in paths]
 | |
| 
 | |
|         try:
 | |
|             isabs, = {p.startswith(sep) for p in paths}
 | |
|         except ValueError:
 | |
|             raise ValueError("Can't mix absolute and relative paths") from None
 | |
| 
 | |
|         split_paths = [[c for c in s if c and c != curdir] for s in split_paths]
 | |
|         s1 = min(split_paths)
 | |
|         s2 = max(split_paths)
 | |
|         common = s1
 | |
|         for i, c in enumerate(s1):
 | |
|             if c != s2[i]:
 | |
|                 common = s1[:i]
 | |
|                 break
 | |
| 
 | |
|         prefix = sep if isabs else sep[:0]
 | |
|         return prefix + sep.join(common)
 | |
|     except (TypeError, AttributeError):
 | |
|         genericpath._check_arg_types('commonpath', *paths)
 | |
|         raise
 |