gh-143008: Fix Null pointer dereferences in TextIOWrapper underlying stream access (#145957)

TextIOWrapper keeps its underlying stream in a member called
`self->buffer`. That stream can be detached by user code, such as custom
`.flush` implementations resulting in `self->buffer` being set to NULL.
The implementation often checked at the start of functions if
`self->buffer` is in a good state, but did not always recheck after
other Python code was called which could modify `self->buffer`.

The cases which need to be re-checked are hard to spot so rather than
rely on reviewer effort create better safety by making all self->buffer
access go through helper functions.

Thank you yihong0618 for the test, NEWS and initial implementation in
gh-143041.

Co-authored-by: yihong0618 <zouzou0208@gmail.com>
Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
Cody Maloney 2026-06-09 03:31:44 -07:00 committed by GitHub
parent 3186547c1e
commit db4b1948bc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 176 additions and 38 deletions

View file

@ -1560,6 +1560,56 @@ def closed(self):
wrapper = self.TextIOWrapper(raw)
wrapper.close() # should not crash
def test_reentrant_detach_during_flush(self):
# gh-143008: Reentrant detach() during flush should not crash.
class DetachOnce(self.BufferedRandom):
wrapper = None
def detach_once(self):
original = self.wrapper
self.wrapper = None
if original is not None:
original.detach()
original.flush()
class DetachOnFlush(DetachOnce):
def flush(self):
self.detach_once()
class DetachOnWrite(DetachOnce):
def write(self, b):
self.detach_once()
return len(b)
# Separate reference for after detach_once.
wrapper = None
def make_text(buffer):
nonlocal wrapper
buffer.wrapper = self.TextIOWrapper(buffer, encoding='utf-8')
wrapper = buffer.wrapper
# Many calls could result in the same null self->buffer crash.
tests = [
('truncate', lambda: wrapper.truncate(0)),
('close', lambda: wrapper.close()),
('detach', lambda: wrapper.detach()),
('seek', lambda: wrapper.seek(0)),
('tell', lambda: wrapper.tell()),
('reconfigure', lambda: wrapper.reconfigure(line_buffering=True)),
]
for name, method in tests:
with self.subTest(name):
make_text(DetachOnFlush(self.MockRawIO()))
self.assertRaisesRegex(ValueError, "detached", method)
# Should not crash.
with self.subTest('read via writeflush'):
make_text(DetachOnWrite(self.MockRawIO()))
wrapper.write('x')
self.assertRaisesRegex(ValueError, "detached", wrapper.read)
class PyTextIOWrapperTest(TextIOWrapperTest, PyTestCase):
shutdown_error = "LookupError: unknown encoding: ascii"

View file

@ -0,0 +1,2 @@
Fix crash in :class:`io.TextIOWrapper` when reentrant
:meth:`io.TextIOBase.detach` is called reentrantly from the underlying buffer.

View file

@ -0,0 +1 @@
Fix race conditions when re-initializing a :class:`io.TextIOWrapper` object.

View file

@ -666,7 +666,9 @@ _io_TextIOWrapper___init__(PyObject *self, PyObject *args, PyObject *kwargs)
goto exit;
}
skip_optional_pos:
Py_BEGIN_CRITICAL_SECTION(self);
return_value = _io_TextIOWrapper___init___impl((textio *)self, buffer, encoding, errors, newline, line_buffering, write_through);
Py_END_CRITICAL_SECTION();
exit:
return return_value;
@ -1329,4 +1331,4 @@ _io_TextIOWrapper__CHUNK_SIZE_set(PyObject *self, PyObject *value, void *Py_UNUS
return return_value;
}
/*[clinic end generated code: output=f900b42090c9781c input=a9049054013a1b77]*/
/*[clinic end generated code: output=8c571c9dba87d2b1 input=a9049054013a1b77]*/

View file

@ -672,6 +672,8 @@ struct textio
int ok; /* initialized? */
int detached;
Py_ssize_t chunk_size;
/* Use helpers buffer_*() functions to access buffer; many operations can set it to
NULL (see gh-143008, gh-142594). */
PyObject *buffer;
PyObject *encoding;
PyObject *encoder;
@ -729,6 +731,67 @@ struct textio
#define textio_CAST(op) ((textio *)(op))
/* Helpers to safely operate on self->buffer.
self->buffer can be detached (set to NULL) by any user code that is called
leading to NULL pointer dereferences (see gh-143008, gh-142594). Protect
against that by using helpers to check self->buffer validity at callsites. */
static PyObject *
buffer_access_safe(textio *self)
{
/* Check self->buffer directly but match errors of CHECK_ATTACHED since this
is called during construction and finalization where self->ok == 0. */
if (self->buffer == NULL) {
if (self->ok <= 0) {
PyErr_SetString(PyExc_ValueError,
"I/O operation on uninitialized object");
}
else {
PyErr_SetString(PyExc_ValueError,
"underlying buffer has been detached");
}
return NULL;
}
/* Returning a borrowed reference is safe since TextIOWrapper methods are
protected by critical sections. */
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self);
return self->buffer;
}
static PyObject *
buffer_getattr(textio *self, PyObject *attr_name)
{
PyObject *buffer = buffer_access_safe(self);
if (buffer == NULL) {
return NULL;
}
return PyObject_GetAttr(buffer, attr_name);
}
static PyObject *
buffer_callmethod_noargs(textio *self, PyObject *name)
{
PyObject *buffer = buffer_access_safe(self);
if (buffer == NULL) {
return NULL;
}
return PyObject_CallMethodNoArgs(buffer, name);
}
static PyObject *
buffer_callmethod_onearg(textio *self, PyObject *name, PyObject *arg)
{
PyObject *buffer = buffer_access_safe(self);
if (buffer == NULL) {
return NULL;
}
return PyObject_CallMethodOneArg(buffer, name, arg);
}
static void
textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
@ -898,7 +961,7 @@ _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
PyObject *res;
int r;
res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
res = buffer_callmethod_noargs(self, &_Py_ID(readable));
if (res == NULL)
return -1;
@ -954,7 +1017,7 @@ _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
PyObject *res;
int r;
res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
res = buffer_callmethod_noargs(self, &_Py_ID(writable));
if (res == NULL)
return -1;
@ -1000,8 +1063,7 @@ _textiowrapper_fix_encoder_state(textio *self)
self->encoding_start_of_stream = 1;
PyObject *cookieObj = PyObject_CallMethodNoArgs(
self->buffer, &_Py_ID(tell));
PyObject *cookieObj = buffer_callmethod_noargs(self, &_Py_ID(tell));
if (cookieObj == NULL) {
return -1;
}
@ -1061,6 +1123,7 @@ io_check_errors(PyObject *errors)
/*[clinic input]
@critical_section
_io.TextIOWrapper.__init__
buffer: object
encoding: str(accept={str, NoneType}) = None
@ -1104,7 +1167,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
const char *encoding, PyObject *errors,
const char *newline, int line_buffering,
int write_through)
/*[clinic end generated code: output=72267c0c01032ed2 input=e6cfaaaf6059d4f5]*/
/*[clinic end generated code: output=72267c0c01032ed2 input=0f077220214c40a4]*/
{
PyObject *raw, *codec_info = NULL;
PyObject *res;
@ -1568,11 +1631,14 @@ _io_TextIOWrapper_detach_impl(textio *self)
/*[clinic end generated code: output=7ba3715cd032d5f2 input=c908a3b4ef203b0f]*/
{
PyObject *buffer;
CHECK_ATTACHED(self);
if (_PyFile_Flush((PyObject *)self) < 0) {
return NULL;
}
buffer = self->buffer;
/* _PyFile_Flush could detach before returning; raise an exception. */
buffer = buffer_access_safe(self);
if (buffer == NULL) {
return NULL;
}
self->buffer = NULL;
self->detached = 1;
return buffer;
@ -1641,7 +1707,7 @@ _textiowrapper_writeflush(textio *self)
PyObject *ret;
do {
ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b);
ret = buffer_callmethod_onearg(self, &_Py_ID(write), b);
} while (ret == NULL && _PyIO_trap_eintr());
Py_DECREF(b);
// NOTE: We cleared buffer but we don't know how many bytes are actually written
@ -1787,7 +1853,8 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
}
if (needflush) {
if (_PyFile_Flush(self->buffer) < 0) {
PyObject *buffer = buffer_access_safe(self);
if (buffer == NULL || _PyFile_Flush(buffer) < 0) {
return NULL;
}
}
@ -1917,9 +1984,10 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
if (chunk_size == NULL)
goto fail;
input_chunk = PyObject_CallMethodOneArg(self->buffer,
(self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)),
chunk_size);
input_chunk = buffer_callmethod_onearg(self,
(self->has_read1 ? &_Py_ID(read1) :
&_Py_ID(read)),
chunk_size);
Py_DECREF(chunk_size);
if (input_chunk == NULL)
goto fail;
@ -2003,7 +2071,7 @@ _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
if (n < 0) {
/* Read everything */
PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read));
PyObject *bytes = buffer_callmethod_noargs(self, &_Py_ID(read));
PyObject *decoded;
if (bytes == NULL)
goto fail;
@ -2600,7 +2668,11 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
Py_DECREF(res);
}
res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2);
PyObject *buf = buffer_access_safe(self);
if (buf == NULL) {
goto fail;
}
res = _PyObject_CallMethod(buf, &_Py_ID(seek), "ii", 0, 2);
Py_CLEAR(cookieObj);
if (res == NULL)
goto fail;
@ -2648,7 +2720,7 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
posobj = PyLong_FromOff_t(cookie.start_pos);
if (posobj == NULL)
goto fail;
res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj);
res = buffer_callmethod_onearg(self, &_Py_ID(seek), posobj);
Py_DECREF(posobj);
if (res == NULL)
goto fail;
@ -2665,8 +2737,15 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
if (cookie.chars_to_skip) {
/* Just like _read_chunk, feed the decoder and save a snapshot. */
PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read),
"i", cookie.bytes_to_feed);
PyObject *bytes_to_feed = PyLong_FromLong(cookie.bytes_to_feed);
if (bytes_to_feed == NULL) {
goto fail;
}
PyObject *input_chunk = buffer_callmethod_onearg(self,
&_Py_ID(read),
bytes_to_feed);
Py_DECREF(bytes_to_feed);
PyObject *decoded;
if (input_chunk == NULL)
@ -2765,7 +2844,7 @@ _io_TextIOWrapper_tell_impl(textio *self)
goto fail;
}
posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell));
posobj = buffer_callmethod_noargs(self, &_Py_ID(tell));
if (posobj == NULL)
goto fail;
@ -2975,7 +3054,7 @@ _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
return NULL;
}
return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
return buffer_callmethod_onearg(self, &_Py_ID(truncate), pos);
}
static PyObject *
@ -3057,8 +3136,7 @@ static PyObject *
_io_TextIOWrapper_fileno_impl(textio *self)
/*[clinic end generated code: output=21490a4c3da13e6c input=515e1196aceb97ab]*/
{
CHECK_ATTACHED(self);
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno));
return buffer_callmethod_noargs(self, &_Py_ID(fileno));
}
/*[clinic input]
@ -3070,8 +3148,7 @@ static PyObject *
_io_TextIOWrapper_seekable_impl(textio *self)
/*[clinic end generated code: output=ab223dbbcffc0f00 input=71c4c092736c549b]*/
{
CHECK_ATTACHED(self);
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable));
return buffer_callmethod_noargs(self, &_Py_ID(seekable));
}
/*[clinic input]
@ -3083,8 +3160,7 @@ static PyObject *
_io_TextIOWrapper_readable_impl(textio *self)
/*[clinic end generated code: output=72ff7ba289a8a91b input=80438d1f01b0a89b]*/
{
CHECK_ATTACHED(self);
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
return buffer_callmethod_noargs(self, &_Py_ID(readable));
}
/*[clinic input]
@ -3096,8 +3172,7 @@ static PyObject *
_io_TextIOWrapper_writable_impl(textio *self)
/*[clinic end generated code: output=a728c71790d03200 input=9d6c22befb0c340a]*/
{
CHECK_ATTACHED(self);
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
return buffer_callmethod_noargs(self, &_Py_ID(writable));
}
/*[clinic input]
@ -3109,8 +3184,7 @@ static PyObject *
_io_TextIOWrapper_isatty_impl(textio *self)
/*[clinic end generated code: output=12be1a35bace882e input=7f83ff04d4d1733d]*/
{
CHECK_ATTACHED(self);
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty));
return buffer_callmethod_noargs(self, &_Py_ID(isatty));
}
/*[clinic input]
@ -3127,7 +3201,7 @@ _io_TextIOWrapper_flush_impl(textio *self)
self->telling = self->seekable;
if (_textiowrapper_writeflush(self) < 0)
return NULL;
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
return buffer_callmethod_noargs(self, &_Py_ID(flush));
}
/*[clinic input]
@ -3160,8 +3234,9 @@ _io_TextIOWrapper_close_impl(textio *self)
else {
PyObject *exc = NULL;
if (self->finalizing) {
res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn),
(PyObject *)self);
res = buffer_callmethod_onearg(self,
&_Py_ID(_dealloc_warn),
(PyObject *)self);
if (res) {
Py_DECREF(res);
}
@ -3173,7 +3248,7 @@ _io_TextIOWrapper_close_impl(textio *self)
exc = PyErr_GetRaisedException();
}
res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close));
res = buffer_callmethod_noargs(self, &_Py_ID(close));
if (exc != NULL) {
_PyErr_ChainExceptions1(exc);
Py_CLEAR(res);
@ -3241,8 +3316,7 @@ static PyObject *
_io_TextIOWrapper_name_get_impl(textio *self)
/*[clinic end generated code: output=8c2f1d6d8756af40 input=26ecec9b39e30e07]*/
{
CHECK_ATTACHED(self);
return PyObject_GetAttr(self->buffer, &_Py_ID(name));
return buffer_getattr(self, &_Py_ID(name));
}
/*[clinic input]
@ -3255,8 +3329,17 @@ static PyObject *
_io_TextIOWrapper_closed_get_impl(textio *self)
/*[clinic end generated code: output=b49b68f443a85e3c input=7dfcf43f63c7003d]*/
{
CHECK_ATTACHED(self);
return PyObject_GetAttr(self->buffer, &_Py_ID(closed));
/* If partially constructed or deconstructed, return that the underlying
buffer is closed.
The code managing the transition is responsible for closing. The closed
attribute is often called in re-initalization, as part of repr in error
cases, and when the I/O stack is garbage collected. */
if (self->ok <= 0) {
Py_RETURN_TRUE;
}
return buffer_getattr(self, &_Py_ID(closed));
}
/*[clinic input]