From e90061f5f13ff8ad43cfed0ca724bef42609fe20 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Thu, 20 Nov 2025 09:37:04 -0800 Subject: [PATCH] gh-60107: Remove a copy from RawIOBase.read (#141532) If the underlying I/O class keeps a reference to the memory, raise BufferError. Co-authored-by: Victor Stinner --- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 4 ++++ Lib/test/test_io/test_general.py | 19 ++++++++++++++++ ...5-11-13-13-11-02.gh-issue-60107.LZq3QF.rst | 2 ++ Modules/_io/iobase.c | 22 +++++++++---------- 7 files changed, 39 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-11-13-13-11-02.gh-issue-60107.LZq3QF.rst diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index c3968aff8f3..783747d1f01 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -2070,6 +2070,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(symmetric_difference_update)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tabsize)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tag)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(take_bytes)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(target)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(target_is_directory)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(task)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 4dd73291df4..374617d8284 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -793,6 +793,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(symmetric_difference_update) STRUCT_FOR_ID(tabsize) STRUCT_FOR_ID(tag) + STRUCT_FOR_ID(take_bytes) STRUCT_FOR_ID(target) STRUCT_FOR_ID(target_is_directory) STRUCT_FOR_ID(task) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 08f8d0e59d1..a66c97f7f13 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -2068,6 +2068,7 @@ extern "C" { INIT_ID(symmetric_difference_update), \ INIT_ID(tabsize), \ INIT_ID(tag), \ + INIT_ID(take_bytes), \ INIT_ID(target), \ INIT_ID(target_is_directory), \ INIT_ID(task), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index b1e57126b92..2061b1d2049 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -2952,6 +2952,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(take_bytes); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(target); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/test/test_io/test_general.py b/Lib/test/test_io/test_general.py index f0677b01ea5..085ed3ea6a9 100644 --- a/Lib/test/test_io/test_general.py +++ b/Lib/test/test_io/test_general.py @@ -609,6 +609,25 @@ def readinto(self, b): with self.assertRaises(ValueError): Misbehaved(bad_size).read() + def test_RawIOBase_read_gh60107(self): + # gh-60107: Ensure a "Raw I/O" which keeps a reference to the + # mutable memory doesn't allow making a mutable bytes. + class RawIOKeepsReference(self.MockRawIOWithoutRead): + def __init__(self, *args, **kwargs): + self.buf = None + super().__init__(*args, **kwargs) + + def readinto(self, buf): + # buf is the bytearray so keeping a reference to it doesn't keep + # the memory alive; a memoryview does. + self.buf = memoryview(buf) + buf[0:4] = self._read_stack.pop() + return 3 + + with self.assertRaises(BufferError): + rawio = RawIOKeepsReference([b"1234"]) + rawio.read(4) + def test_types_have_dict(self): test = ( self.IOBase(), diff --git a/Misc/NEWS.d/next/Library/2025-11-13-13-11-02.gh-issue-60107.LZq3QF.rst b/Misc/NEWS.d/next/Library/2025-11-13-13-11-02.gh-issue-60107.LZq3QF.rst new file mode 100644 index 00000000000..c5103fb8005 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-11-13-13-11-02.gh-issue-60107.LZq3QF.rst @@ -0,0 +1,2 @@ +Remove a copy from :meth:`io.RawIOBase.read`. If the underlying I/O class +keeps a reference to the mutable memory, raise a :exc:`BufferError`. diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index f1c2fe17801..f036ea503b1 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -927,33 +927,33 @@ _io__RawIOBase_read_impl(PyObject *self, Py_ssize_t n) return PyObject_CallMethodNoArgs(self, &_Py_ID(readall)); } - /* TODO: allocate a bytes object directly instead and manually construct - a writable memoryview pointing to it. */ b = PyByteArray_FromStringAndSize(NULL, n); - if (b == NULL) + if (b == NULL) { return NULL; + } res = PyObject_CallMethodObjArgs(self, &_Py_ID(readinto), b, NULL); if (res == NULL || res == Py_None) { - Py_DECREF(b); - return res; + goto cleanup; } Py_ssize_t bytes_filled = PyNumber_AsSsize_t(res, PyExc_ValueError); - Py_DECREF(res); + Py_CLEAR(res); if (bytes_filled == -1 && PyErr_Occurred()) { - Py_DECREF(b); - return NULL; + goto cleanup; } if (bytes_filled < 0 || bytes_filled > n) { - Py_DECREF(b); PyErr_Format(PyExc_ValueError, "readinto returned %zd outside buffer size %zd", bytes_filled, n); - return NULL; + goto cleanup; } + if (PyByteArray_Resize(b, bytes_filled) < 0) { + goto cleanup; + } + res = PyObject_CallMethodNoArgs(b, &_Py_ID(take_bytes)); - res = PyBytes_FromStringAndSize(PyByteArray_AsString(b), bytes_filled); +cleanup: Py_DECREF(b); return res; }