bpo-45506: Normalize _PyPathConfig.stdlib_dir when calculated. (#29040)

The recently added PyConfig.stdlib_dir was being set with ".." entries. When __file__ was added for from modules this caused a problem on out-of-tree builds. This PR fixes that by normalizing "stdlib_dir" when it is calculated in getpath.c.

https://bugs.python.org/issue45506
This commit is contained in:
Eric Snow 2021-10-22 17:20:03 -06:00 committed by GitHub
parent f30ad65dbf
commit 17c61045c5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 236 additions and 24 deletions

View file

@ -80,6 +80,9 @@ extern int _Py_add_relfile(wchar_t *dirname,
const wchar_t *relfile, const wchar_t *relfile,
size_t bufsize); size_t bufsize);
extern size_t _Py_find_basename(const wchar_t *filename); extern size_t _Py_find_basename(const wchar_t *filename);
PyAPI_FUNC(int) _Py_normalize_path(const wchar_t *path,
wchar_t *buf, const size_t buf_len);
// Macros to protect CRT calls against instant termination when passed an // Macros to protect CRT calls against instant termination when passed an
// invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler. // invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler.

View file

@ -0,0 +1,30 @@
# Run tests for functions in Python/fileutils.c.
import os
import os.path
import unittest
from test.support import import_helper
# Skip this test if the _testcapi module isn't available.
_testcapi = import_helper.import_module('_testinternalcapi')
class PathTests(unittest.TestCase):
def test_capi_normalize_path(self):
if os.name == 'nt':
raise unittest.SkipTest('Windows has its own helper for this')
else:
from .test_posixpath import PosixPathTest as posixdata
tests = posixdata.NORMPATH_CASES
for filename, expected in tests:
if not os.path.isabs(filename):
continue
with self.subTest(filename):
result = _testcapi.normalize_path(filename)
self.assertEqual(result, expected,
msg=f'input: {filename!r} expected output: {expected!r}')
if __name__ == "__main__":
unittest.main()

View file

@ -304,25 +304,51 @@ def test_expanduser_pwd(self):
for path in ('~', '~/.local', '~vstinner/'): for path in ('~', '~/.local', '~vstinner/'):
self.assertEqual(posixpath.expanduser(path), path) self.assertEqual(posixpath.expanduser(path), path)
def test_normpath(self): NORMPATH_CASES = [
self.assertEqual(posixpath.normpath(""), ".") ("", "."),
self.assertEqual(posixpath.normpath("/"), "/") ("/", "/"),
self.assertEqual(posixpath.normpath("//"), "//") ("/.", "/"),
self.assertEqual(posixpath.normpath("///"), "/") ("/./", "/"),
self.assertEqual(posixpath.normpath("///foo/.//bar//"), "/foo/bar") ("/.//.", "/"),
self.assertEqual(posixpath.normpath("///foo/.//bar//.//..//.//baz"), ("/foo", "/foo"),
"/foo/baz") ("/foo/bar", "/foo/bar"),
self.assertEqual(posixpath.normpath("///..//./foo/.//bar"), "/foo/bar") ("//", "//"),
("///", "/"),
("///foo/.//bar//", "/foo/bar"),
("///foo/.//bar//.//..//.//baz///", "/foo/baz"),
("///..//./foo/.//bar", "/foo/bar"),
(".", "."),
(".//.", "."),
("..", ".."),
("../", ".."),
("../foo", "../foo"),
("../../foo", "../../foo"),
("../foo/../bar", "../bar"),
("../../foo/../bar/./baz/boom/..", "../../bar/baz"),
("/..", "/"),
("/..", "/"),
("/../", "/"),
("/..//", "/"),
("//..", "//"),
("/../foo", "/foo"),
("/../../foo", "/foo"),
("/../foo/../", "/"),
("/../foo/../bar", "/bar"),
("/../../foo/../bar/./baz/boom/..", "/bar/baz"),
("/../../foo/../bar/./baz/boom/.", "/bar/baz/boom"),
]
self.assertEqual(posixpath.normpath(b""), b".") def test_normpath(self):
self.assertEqual(posixpath.normpath(b"/"), b"/") for path, expected in self.NORMPATH_CASES:
self.assertEqual(posixpath.normpath(b"//"), b"//") with self.subTest(path):
self.assertEqual(posixpath.normpath(b"///"), b"/") result = posixpath.normpath(path)
self.assertEqual(posixpath.normpath(b"///foo/.//bar//"), b"/foo/bar") self.assertEqual(result, expected)
self.assertEqual(posixpath.normpath(b"///foo/.//bar//.//..//.//baz"),
b"/foo/baz") path = path.encode('utf-8')
self.assertEqual(posixpath.normpath(b"///..//./foo/.//bar"), expected = expected.encode('utf-8')
b"/foo/bar") with self.subTest(path, type=bytes):
result = posixpath.normpath(path)
self.assertEqual(result, expected)
@skip_if_ABSTFN_contains_backslash @skip_if_ABSTFN_contains_backslash
def test_realpath_curdir(self): def test_realpath_curdir(self):

View file

@ -14,12 +14,14 @@
#include "Python.h" #include "Python.h"
#include "pycore_atomic_funcs.h" // _Py_atomic_int_get() #include "pycore_atomic_funcs.h" // _Py_atomic_int_get()
#include "pycore_bitutils.h" // _Py_bswap32() #include "pycore_bitutils.h" // _Py_bswap32()
#include "pycore_fileutils.h" // _Py_normalize_path
#include "pycore_gc.h" // PyGC_Head #include "pycore_gc.h" // PyGC_Head
#include "pycore_hashtable.h" // _Py_hashtable_new() #include "pycore_hashtable.h" // _Py_hashtable_new()
#include "pycore_initconfig.h" // _Py_GetConfigsAsDict() #include "pycore_initconfig.h" // _Py_GetConfigsAsDict()
#include "pycore_interp.h" // _PyInterpreterState_GetConfigCopy() #include "pycore_interp.h" // _PyInterpreterState_GetConfigCopy()
#include "pycore_pyerrors.h" // _Py_UTF8_Edit_Cost() #include "pycore_pyerrors.h" // _Py_UTF8_Edit_Cost()
#include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_pystate.h" // _PyThreadState_GET()
#include "osdefs.h" // MAXPATHLEN
static PyObject * static PyObject *
@ -366,6 +368,27 @@ test_edit_cost(PyObject *self, PyObject *Py_UNUSED(args))
} }
static PyObject *
normalize_path(PyObject *self, PyObject *filename)
{
Py_ssize_t size = -1;
wchar_t *encoded = PyUnicode_AsWideCharString(filename, &size);
if (encoded == NULL) {
return NULL;
}
wchar_t buf[MAXPATHLEN + 1];
int res = _Py_normalize_path(encoded, buf, Py_ARRAY_LENGTH(buf));
PyMem_Free(encoded);
if (res != 0) {
PyErr_SetString(PyExc_ValueError, "string too long");
return NULL;
}
return PyUnicode_FromWideChar(buf, -1);
}
static PyMethodDef TestMethods[] = { static PyMethodDef TestMethods[] = {
{"get_configs", get_configs, METH_NOARGS}, {"get_configs", get_configs, METH_NOARGS},
{"get_recursion_depth", get_recursion_depth, METH_NOARGS}, {"get_recursion_depth", get_recursion_depth, METH_NOARGS},
@ -377,6 +400,7 @@ static PyMethodDef TestMethods[] = {
{"set_config", test_set_config, METH_O}, {"set_config", test_set_config, METH_O},
{"test_atomic_funcs", test_atomic_funcs, METH_NOARGS}, {"test_atomic_funcs", test_atomic_funcs, METH_NOARGS},
{"test_edit_cost", test_edit_cost, METH_NOARGS}, {"test_edit_cost", test_edit_cost, METH_NOARGS},
{"normalize_path", normalize_path, METH_O, NULL},
{NULL, NULL} /* sentinel */ {NULL, NULL} /* sentinel */
}; };

View file

@ -519,6 +519,42 @@ search_for_prefix(PyCalculatePath *calculate, _PyPathConfig *pathconfig,
} }
static PyStatus
calculate_set_stdlib_dir(PyCalculatePath *calculate, _PyPathConfig *pathconfig)
{
// Note that, unlike calculate_set_prefix(), here we allow a negative
// prefix_found. That means the source tree Lib dir gets used.
if (!calculate->prefix_found) {
return _PyStatus_OK();
}
PyStatus status;
wchar_t *prefix = calculate->prefix;
if (!_Py_isabs(prefix)) {
prefix = _PyMem_RawWcsdup(prefix);
if (prefix == NULL) {
return _PyStatus_NO_MEMORY();
}
status = absolutize(&prefix);
if (_PyStatus_EXCEPTION(status)) {
return status;
}
}
wchar_t buf[MAXPATHLEN + 1];
int res = _Py_normalize_path(prefix, buf, Py_ARRAY_LENGTH(buf));
if (prefix != calculate->prefix) {
PyMem_RawFree(prefix);
}
if (res < 0) {
return PATHLEN_ERR();
}
pathconfig->stdlib_dir = _PyMem_RawWcsdup(buf);
if (pathconfig->stdlib_dir == NULL) {
return _PyStatus_NO_MEMORY();
}
return _PyStatus_OK();
}
static PyStatus static PyStatus
calculate_prefix(PyCalculatePath *calculate, _PyPathConfig *pathconfig) calculate_prefix(PyCalculatePath *calculate, _PyPathConfig *pathconfig)
{ {
@ -1494,12 +1530,10 @@ calculate_path(PyCalculatePath *calculate, _PyPathConfig *pathconfig)
} }
if (pathconfig->stdlib_dir == NULL) { if (pathconfig->stdlib_dir == NULL) {
if (calculate->prefix_found) { /* This must be done *before* calculate_set_prefix() is called. */
/* This must be done *before* calculate_set_prefix() is called. */ status = calculate_set_stdlib_dir(calculate, pathconfig);
pathconfig->stdlib_dir = _PyMem_RawWcsdup(calculate->prefix); if (_PyStatus_EXCEPTION(status)) {
if (pathconfig->stdlib_dir == NULL) { return status;
return _PyStatus_NO_MEMORY();
}
} }
} }

View file

@ -2181,6 +2181,101 @@ _Py_find_basename(const wchar_t *filename)
} }
/* Remove navigation elements such as "." and "..".
This is mostly a C implementation of posixpath.normpath().
Return 0 on success. Return -1 if "orig" is too big for the buffer. */
int
_Py_normalize_path(const wchar_t *path, wchar_t *buf, const size_t buf_len)
{
assert(path && *path != L'\0');
assert(*path == SEP); // an absolute path
if (wcslen(path) + 1 >= buf_len) {
return -1;
}
int dots = -1;
int check_leading = 1;
const wchar_t *buf_start = buf;
wchar_t *buf_next = buf;
// The resulting filename will never be longer than path.
for (const wchar_t *remainder = path; *remainder != L'\0'; remainder++) {
wchar_t c = *remainder;
buf_next[0] = c;
buf_next++;
if (c == SEP) {
assert(dots <= 2);
if (dots == 2) {
// Turn "/x/y/../z" into "/x/z".
buf_next -= 4; // "/../"
assert(*buf_next == SEP);
// We cap it off at the root, so "/../spam" becomes "/spam".
if (buf_next == buf_start) {
buf_next++;
}
else {
// Move to the previous SEP in the buffer.
while (*(buf_next - 1) != SEP) {
assert(buf_next != buf_start);
buf_next--;
}
}
}
else if (dots == 1) {
// Turn "/./" into "/".
buf_next -= 2; // "./"
assert(*(buf_next - 1) == SEP);
}
else if (dots == 0) {
// Turn "//" into "/".
buf_next--;
assert(*(buf_next - 1) == SEP);
if (check_leading) {
if (buf_next - 1 == buf && *(remainder + 1) != SEP) {
// Leave a leading "//" alone, unless "///...".
buf_next++;
buf_start++;
}
check_leading = 0;
}
}
dots = 0;
}
else {
check_leading = 0;
if (dots >= 0) {
if (c == L'.' && dots < 2) {
dots++;
}
else {
dots = -1;
}
}
}
}
if (dots >= 0) {
// Strip any trailing dots and trailing slash.
buf_next -= dots + 1; // "/" or "/." or "/.."
assert(*buf_next == SEP);
if (buf_next == buf_start) {
// Leave the leading slash for root.
buf_next++;
}
else {
if (dots == 2) {
// Move to the previous SEP in the buffer.
do {
assert(buf_next != buf_start);
buf_next--;
} while (*(buf_next) != SEP);
}
}
}
*buf_next = L'\0';
return 0;
}
/* Get the current directory. buflen is the buffer size in wide characters /* Get the current directory. buflen is the buffer size in wide characters
including the null character. Decode the path from the locale encoding. including the null character. Decode the path from the locale encoding.