mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	GH-82805: Fix handling of single-dot file extensions in pathlib (#118952)
pathlib now treats "`.`" as a valid file extension (suffix). This brings it in line with `os.path.splitext()`. In the (private) pathlib ABCs, we add a new `ParserBase.splitext()` method that splits a path into a `(root, ext)` pair, like `os.path.splitext()`. This method is called by `PurePathBase.stem`, `suffix`, etc. In a future version of pathlib, we might make these base classes public, and so users will be able to define their own `splitext()` method to control file extension splitting. In `pathlib.PurePath` we add optimised `stem`, `suffix` and `suffixes` properties that don't use `splitext()`, which avoids computing the path base name twice.
This commit is contained in:
		
							parent
							
								
									0c5ebe13e9
								
							
						
					
					
						commit
						e418fc3a6e
					
				
					 5 changed files with 101 additions and 35 deletions
				
			
		|  | @ -449,6 +449,10 @@ Pure paths provide the following methods and properties: | |||
| 
 | ||||
|    This is commonly called the file extension. | ||||
| 
 | ||||
|    .. versionchanged:: 3.14 | ||||
| 
 | ||||
|       A single dot ("``.``") is considered a valid suffix. | ||||
| 
 | ||||
| .. attribute:: PurePath.suffixes | ||||
| 
 | ||||
|    A list of the path's suffixes, often called file extensions:: | ||||
|  | @ -460,6 +464,10 @@ Pure paths provide the following methods and properties: | |||
|       >>> PurePosixPath('my/library').suffixes | ||||
|       [] | ||||
| 
 | ||||
|    .. versionchanged:: 3.14 | ||||
| 
 | ||||
|       A single dot ("``.``") is considered a valid suffix. | ||||
| 
 | ||||
| 
 | ||||
| .. attribute:: PurePath.stem | ||||
| 
 | ||||
|  | @ -713,6 +721,11 @@ Pure paths provide the following methods and properties: | |||
|       >>> p.with_suffix('') | ||||
|       PureWindowsPath('README') | ||||
| 
 | ||||
|    .. versionchanged:: 3.14 | ||||
| 
 | ||||
|       A single dot ("``.``") is considered a valid suffix. In previous | ||||
|       versions, :exc:`ValueError` is raised if a single dot is supplied. | ||||
| 
 | ||||
| 
 | ||||
| .. method:: PurePath.with_segments(*pathsegments) | ||||
| 
 | ||||
|  |  | |||
|  | @ -68,6 +68,12 @@ def splitdrive(self, path): | |||
|         drive. Either part may be empty.""" | ||||
|         raise UnsupportedOperation(self._unsupported_msg('splitdrive()')) | ||||
| 
 | ||||
|     def splitext(self, path): | ||||
|         """Split the path into a pair (root, ext), where *ext* is empty or | ||||
|         begins with a begins with a period and contains at most one period, | ||||
|         and *root* is everything before the extension.""" | ||||
|         raise UnsupportedOperation(self._unsupported_msg('splitext()')) | ||||
| 
 | ||||
|     def normcase(self, path): | ||||
|         """Normalize the case of the path.""" | ||||
|         raise UnsupportedOperation(self._unsupported_msg('normcase()')) | ||||
|  | @ -151,12 +157,7 @@ def suffix(self): | |||
| 
 | ||||
|         This includes the leading period. For example: '.txt' | ||||
|         """ | ||||
|         name = self.name | ||||
|         i = name.rfind('.') | ||||
|         if 0 < i < len(name) - 1: | ||||
|             return name[i:] | ||||
|         else: | ||||
|             return '' | ||||
|         return self.parser.splitext(self.name)[1] | ||||
| 
 | ||||
|     @property | ||||
|     def suffixes(self): | ||||
|  | @ -165,21 +166,18 @@ def suffixes(self): | |||
| 
 | ||||
|         These include the leading periods. For example: ['.tar', '.gz'] | ||||
|         """ | ||||
|         name = self.name | ||||
|         if name.endswith('.'): | ||||
|             return [] | ||||
|         name = name.lstrip('.') | ||||
|         return ['.' + suffix for suffix in name.split('.')[1:]] | ||||
|         split = self.parser.splitext | ||||
|         stem, suffix = split(self.name) | ||||
|         suffixes = [] | ||||
|         while suffix: | ||||
|             suffixes.append(suffix) | ||||
|             stem, suffix = split(stem) | ||||
|         return suffixes[::-1] | ||||
| 
 | ||||
|     @property | ||||
|     def stem(self): | ||||
|         """The final path component, minus its last suffix.""" | ||||
|         name = self.name | ||||
|         i = name.rfind('.') | ||||
|         if 0 < i < len(name) - 1: | ||||
|             return name[:i] | ||||
|         else: | ||||
|             return name | ||||
|         return self.parser.splitext(self.name)[0] | ||||
| 
 | ||||
|     def with_name(self, name): | ||||
|         """Return a new path with the file name changed.""" | ||||
|  | @ -208,7 +206,7 @@ def with_suffix(self, suffix): | |||
|         if not stem: | ||||
|             # If the stem is empty, we can't make the suffix non-empty. | ||||
|             raise ValueError(f"{self!r} has an empty name") | ||||
|         elif suffix and not (suffix.startswith('.') and len(suffix) > 1): | ||||
|         elif suffix and not suffix.startswith('.'): | ||||
|             raise ValueError(f"Invalid suffix {suffix!r}") | ||||
|         else: | ||||
|             return self.with_name(stem + suffix) | ||||
|  |  | |||
|  | @ -361,6 +361,40 @@ def with_name(self, name): | |||
|         tail[-1] = name | ||||
|         return self._from_parsed_parts(self.drive, self.root, tail) | ||||
| 
 | ||||
|     @property | ||||
|     def stem(self): | ||||
|         """The final path component, minus its last suffix.""" | ||||
|         name = self.name | ||||
|         i = name.rfind('.') | ||||
|         if i != -1: | ||||
|             stem = name[:i] | ||||
|             # Stem must contain at least one non-dot character. | ||||
|             if stem.lstrip('.'): | ||||
|                 return stem | ||||
|         return name | ||||
| 
 | ||||
|     @property | ||||
|     def suffix(self): | ||||
|         """ | ||||
|         The final component's last suffix, if any. | ||||
| 
 | ||||
|         This includes the leading period. For example: '.txt' | ||||
|         """ | ||||
|         name = self.name.lstrip('.') | ||||
|         i = name.rfind('.') | ||||
|         if i != -1: | ||||
|             return name[i:] | ||||
|         return '' | ||||
| 
 | ||||
|     @property | ||||
|     def suffixes(self): | ||||
|         """ | ||||
|         A list of the final component's suffixes, if any. | ||||
| 
 | ||||
|         These include the leading periods. For example: ['.tar', '.gz'] | ||||
|         """ | ||||
|         return ['.' + ext for ext in self.name.lstrip('.').split('.')[1:]] | ||||
| 
 | ||||
|     def relative_to(self, other, *, walk_up=False): | ||||
|         """Return the relative path to another path identified by the passed | ||||
|         arguments.  If the operation is not possible (because this is not | ||||
|  |  | |||
|  | @ -50,6 +50,7 @@ def test_unsupported_operation(self): | |||
|         self.assertRaises(e, m.join, 'foo') | ||||
|         self.assertRaises(e, m.split, 'foo') | ||||
|         self.assertRaises(e, m.splitdrive, 'foo') | ||||
|         self.assertRaises(e, m.splitext, 'foo') | ||||
|         self.assertRaises(e, m.normcase, 'foo') | ||||
|         self.assertRaises(e, m.isabs, 'foo') | ||||
| 
 | ||||
|  | @ -789,8 +790,12 @@ def test_suffix_common(self): | |||
|         self.assertEqual(P('/a/.hg.rc').suffix, '.rc') | ||||
|         self.assertEqual(P('a/b.tar.gz').suffix, '.gz') | ||||
|         self.assertEqual(P('/a/b.tar.gz').suffix, '.gz') | ||||
|         self.assertEqual(P('a/Some name. Ending with a dot.').suffix, '') | ||||
|         self.assertEqual(P('/a/Some name. Ending with a dot.').suffix, '') | ||||
|         self.assertEqual(P('a/trailing.dot.').suffix, '.') | ||||
|         self.assertEqual(P('/a/trailing.dot.').suffix, '.') | ||||
|         self.assertEqual(P('a/..d.o.t..').suffix, '.') | ||||
|         self.assertEqual(P('a/inn.er..dots').suffix, '.dots') | ||||
|         self.assertEqual(P('photo').suffix, '') | ||||
|         self.assertEqual(P('photo.jpg').suffix, '.jpg') | ||||
| 
 | ||||
|     @needs_windows | ||||
|     def test_suffix_windows(self): | ||||
|  | @ -807,8 +812,8 @@ def test_suffix_windows(self): | |||
|         self.assertEqual(P('c:/a/.hg.rc').suffix, '.rc') | ||||
|         self.assertEqual(P('c:a/b.tar.gz').suffix, '.gz') | ||||
|         self.assertEqual(P('c:/a/b.tar.gz').suffix, '.gz') | ||||
|         self.assertEqual(P('c:a/Some name. Ending with a dot.').suffix, '') | ||||
|         self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffix, '') | ||||
|         self.assertEqual(P('c:a/trailing.dot.').suffix, '.') | ||||
|         self.assertEqual(P('c:/a/trailing.dot.').suffix, '.') | ||||
|         self.assertEqual(P('//My.py/Share.php').suffix, '') | ||||
|         self.assertEqual(P('//My.py/Share.php/a/b').suffix, '') | ||||
| 
 | ||||
|  | @ -828,8 +833,12 @@ def test_suffixes_common(self): | |||
|         self.assertEqual(P('/a/.hg.rc').suffixes, ['.rc']) | ||||
|         self.assertEqual(P('a/b.tar.gz').suffixes, ['.tar', '.gz']) | ||||
|         self.assertEqual(P('/a/b.tar.gz').suffixes, ['.tar', '.gz']) | ||||
|         self.assertEqual(P('a/Some name. Ending with a dot.').suffixes, []) | ||||
|         self.assertEqual(P('/a/Some name. Ending with a dot.').suffixes, []) | ||||
|         self.assertEqual(P('a/trailing.dot.').suffixes, ['.dot', '.']) | ||||
|         self.assertEqual(P('/a/trailing.dot.').suffixes, ['.dot', '.']) | ||||
|         self.assertEqual(P('a/..d.o.t..').suffixes, ['.o', '.t', '.', '.']) | ||||
|         self.assertEqual(P('a/inn.er..dots').suffixes, ['.er', '.', '.dots']) | ||||
|         self.assertEqual(P('photo').suffixes, []) | ||||
|         self.assertEqual(P('photo.jpg').suffixes, ['.jpg']) | ||||
| 
 | ||||
|     @needs_windows | ||||
|     def test_suffixes_windows(self): | ||||
|  | @ -848,8 +857,8 @@ def test_suffixes_windows(self): | |||
|         self.assertEqual(P('c:/a/b.tar.gz').suffixes, ['.tar', '.gz']) | ||||
|         self.assertEqual(P('//My.py/Share.php').suffixes, []) | ||||
|         self.assertEqual(P('//My.py/Share.php/a/b').suffixes, []) | ||||
|         self.assertEqual(P('c:a/Some name. Ending with a dot.').suffixes, []) | ||||
|         self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffixes, []) | ||||
|         self.assertEqual(P('c:a/trailing.dot.').suffixes, ['.dot', '.']) | ||||
|         self.assertEqual(P('c:/a/trailing.dot.').suffixes, ['.dot', '.']) | ||||
| 
 | ||||
|     def test_stem_empty(self): | ||||
|         P = self.cls | ||||
|  | @ -865,8 +874,11 @@ def test_stem_common(self): | |||
|         self.assertEqual(P('a/.hgrc').stem, '.hgrc') | ||||
|         self.assertEqual(P('a/.hg.rc').stem, '.hg') | ||||
|         self.assertEqual(P('a/b.tar.gz').stem, 'b.tar') | ||||
|         self.assertEqual(P('a/Some name. Ending with a dot.').stem, | ||||
|                          'Some name. Ending with a dot.') | ||||
|         self.assertEqual(P('a/trailing.dot.').stem, 'trailing.dot') | ||||
|         self.assertEqual(P('a/..d.o.t..').stem, '..d.o.t.') | ||||
|         self.assertEqual(P('a/inn.er..dots').stem, 'inn.er.') | ||||
|         self.assertEqual(P('photo').stem, 'photo') | ||||
|         self.assertEqual(P('photo.jpg').stem, 'photo') | ||||
| 
 | ||||
|     @needs_windows | ||||
|     def test_stem_windows(self): | ||||
|  | @ -880,8 +892,8 @@ def test_stem_windows(self): | |||
|         self.assertEqual(P('c:a/.hgrc').stem, '.hgrc') | ||||
|         self.assertEqual(P('c:a/.hg.rc').stem, '.hg') | ||||
|         self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar') | ||||
|         self.assertEqual(P('c:a/Some name. Ending with a dot.').stem, | ||||
|                          'Some name. Ending with a dot.') | ||||
|         self.assertEqual(P('c:a/trailing.dot.').stem, 'trailing.dot') | ||||
| 
 | ||||
|     def test_with_name_common(self): | ||||
|         P = self.cls | ||||
|         self.assertEqual(P('a/b').with_name('d.xml'), P('a/d.xml')) | ||||
|  | @ -929,16 +941,16 @@ def test_with_stem_common(self): | |||
|         self.assertEqual(P('a/b.py').with_stem('d'), P('a/d.py')) | ||||
|         self.assertEqual(P('/a/b.py').with_stem('d'), P('/a/d.py')) | ||||
|         self.assertEqual(P('/a/b.tar.gz').with_stem('d'), P('/a/d.gz')) | ||||
|         self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d')) | ||||
|         self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d')) | ||||
|         self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d.')) | ||||
|         self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d.')) | ||||
| 
 | ||||
|     @needs_windows | ||||
|     def test_with_stem_windows(self): | ||||
|         P = self.cls | ||||
|         self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d')) | ||||
|         self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d')) | ||||
|         self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d')) | ||||
|         self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d')) | ||||
|         self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d.')) | ||||
|         self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d.')) | ||||
|         self.assertRaises(ValueError, P('c:').with_stem, 'd') | ||||
|         self.assertRaises(ValueError, P('c:/').with_stem, 'd') | ||||
|         self.assertRaises(ValueError, P('//My/Share').with_stem, 'd') | ||||
|  | @ -974,6 +986,11 @@ def test_with_suffix_common(self): | |||
|         # Stripping suffix. | ||||
|         self.assertEqual(P('a/b.py').with_suffix(''), P('a/b')) | ||||
|         self.assertEqual(P('/a/b').with_suffix(''), P('/a/b')) | ||||
|         # Single dot | ||||
|         self.assertEqual(P('a/b').with_suffix('.'), P('a/b.')) | ||||
|         self.assertEqual(P('/a/b').with_suffix('.'), P('/a/b.')) | ||||
|         self.assertEqual(P('a/b.py').with_suffix('.'), P('a/b.')) | ||||
|         self.assertEqual(P('/a/b.py').with_suffix('.'), P('/a/b.')) | ||||
| 
 | ||||
|     @needs_windows | ||||
|     def test_with_suffix_windows(self): | ||||
|  | @ -1012,7 +1029,6 @@ def test_with_suffix_invalid(self): | |||
|         # Invalid suffix. | ||||
|         self.assertRaises(ValueError, P('a/b').with_suffix, 'gz') | ||||
|         self.assertRaises(ValueError, P('a/b').with_suffix, '/') | ||||
|         self.assertRaises(ValueError, P('a/b').with_suffix, '.') | ||||
|         self.assertRaises(ValueError, P('a/b').with_suffix, '/.gz') | ||||
|         self.assertRaises(ValueError, P('a/b').with_suffix, 'c/d') | ||||
|         self.assertRaises(ValueError, P('a/b').with_suffix, '.c/.d') | ||||
|  |  | |||
|  | @ -0,0 +1,5 @@ | |||
| Support single-dot file extensions in :attr:`pathlib.PurePath.suffix` and | ||||
| related attributes and methods. For example, the | ||||
| :attr:`~pathlib.PurePath.suffixes` of ``PurePath('foo.bar.')`` are now | ||||
| ``['.bar', '.']`` rather than ``[]``. This brings file extension splitting | ||||
| in line with :func:`os.path.splitext`. | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Barney Gale
						Barney Gale