mirror of
https://github.com/python/cpython.git
synced 2026-06-10 11:52:01 +00:00
gh-143008: Fix Null pointer dereferences in TextIOWrapper underlying stream access (#145957)
TextIOWrapper keeps its underlying stream in a member called `self->buffer`. That stream can be detached by user code, such as custom `.flush` implementations resulting in `self->buffer` being set to NULL. The implementation often checked at the start of functions if `self->buffer` is in a good state, but did not always recheck after other Python code was called which could modify `self->buffer`. The cases which need to be re-checked are hard to spot so rather than rely on reviewer effort create better safety by making all self->buffer access go through helper functions. Thank you yihong0618 for the test, NEWS and initial implementation in gh-143041. Co-authored-by: yihong0618 <zouzou0208@gmail.com> Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
parent
3186547c1e
commit
db4b1948bc
5 changed files with 176 additions and 38 deletions
|
|
@ -1560,6 +1560,56 @@ def closed(self):
|
|||
wrapper = self.TextIOWrapper(raw)
|
||||
wrapper.close() # should not crash
|
||||
|
||||
def test_reentrant_detach_during_flush(self):
|
||||
# gh-143008: Reentrant detach() during flush should not crash.
|
||||
|
||||
class DetachOnce(self.BufferedRandom):
|
||||
wrapper = None
|
||||
|
||||
def detach_once(self):
|
||||
original = self.wrapper
|
||||
self.wrapper = None
|
||||
if original is not None:
|
||||
original.detach()
|
||||
original.flush()
|
||||
|
||||
class DetachOnFlush(DetachOnce):
|
||||
def flush(self):
|
||||
self.detach_once()
|
||||
|
||||
class DetachOnWrite(DetachOnce):
|
||||
def write(self, b):
|
||||
self.detach_once()
|
||||
return len(b)
|
||||
|
||||
# Separate reference for after detach_once.
|
||||
wrapper = None
|
||||
|
||||
def make_text(buffer):
|
||||
nonlocal wrapper
|
||||
buffer.wrapper = self.TextIOWrapper(buffer, encoding='utf-8')
|
||||
wrapper = buffer.wrapper
|
||||
|
||||
# Many calls could result in the same null self->buffer crash.
|
||||
tests = [
|
||||
('truncate', lambda: wrapper.truncate(0)),
|
||||
('close', lambda: wrapper.close()),
|
||||
('detach', lambda: wrapper.detach()),
|
||||
('seek', lambda: wrapper.seek(0)),
|
||||
('tell', lambda: wrapper.tell()),
|
||||
('reconfigure', lambda: wrapper.reconfigure(line_buffering=True)),
|
||||
]
|
||||
for name, method in tests:
|
||||
with self.subTest(name):
|
||||
make_text(DetachOnFlush(self.MockRawIO()))
|
||||
self.assertRaisesRegex(ValueError, "detached", method)
|
||||
|
||||
# Should not crash.
|
||||
with self.subTest('read via writeflush'):
|
||||
make_text(DetachOnWrite(self.MockRawIO()))
|
||||
wrapper.write('x')
|
||||
self.assertRaisesRegex(ValueError, "detached", wrapper.read)
|
||||
|
||||
|
||||
class PyTextIOWrapperTest(TextIOWrapperTest, PyTestCase):
|
||||
shutdown_error = "LookupError: unknown encoding: ascii"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,2 @@
|
|||
Fix crash in :class:`io.TextIOWrapper` when reentrant
|
||||
:meth:`io.TextIOBase.detach` is called reentrantly from the underlying buffer.
|
||||
|
|
@ -0,0 +1 @@
|
|||
Fix race conditions when re-initializing a :class:`io.TextIOWrapper` object.
|
||||
4
Modules/_io/clinic/textio.c.h
generated
4
Modules/_io/clinic/textio.c.h
generated
|
|
@ -666,7 +666,9 @@ _io_TextIOWrapper___init__(PyObject *self, PyObject *args, PyObject *kwargs)
|
|||
goto exit;
|
||||
}
|
||||
skip_optional_pos:
|
||||
Py_BEGIN_CRITICAL_SECTION(self);
|
||||
return_value = _io_TextIOWrapper___init___impl((textio *)self, buffer, encoding, errors, newline, line_buffering, write_through);
|
||||
Py_END_CRITICAL_SECTION();
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
|
|
@ -1329,4 +1331,4 @@ _io_TextIOWrapper__CHUNK_SIZE_set(PyObject *self, PyObject *value, void *Py_UNUS
|
|||
|
||||
return return_value;
|
||||
}
|
||||
/*[clinic end generated code: output=f900b42090c9781c input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=8c571c9dba87d2b1 input=a9049054013a1b77]*/
|
||||
|
|
|
|||
|
|
@ -672,6 +672,8 @@ struct textio
|
|||
int ok; /* initialized? */
|
||||
int detached;
|
||||
Py_ssize_t chunk_size;
|
||||
/* Use helpers buffer_*() functions to access buffer; many operations can set it to
|
||||
NULL (see gh-143008, gh-142594). */
|
||||
PyObject *buffer;
|
||||
PyObject *encoding;
|
||||
PyObject *encoder;
|
||||
|
|
@ -729,6 +731,67 @@ struct textio
|
|||
|
||||
#define textio_CAST(op) ((textio *)(op))
|
||||
|
||||
/* Helpers to safely operate on self->buffer.
|
||||
|
||||
self->buffer can be detached (set to NULL) by any user code that is called
|
||||
leading to NULL pointer dereferences (see gh-143008, gh-142594). Protect
|
||||
against that by using helpers to check self->buffer validity at callsites. */
|
||||
static PyObject *
|
||||
buffer_access_safe(textio *self)
|
||||
{
|
||||
/* Check self->buffer directly but match errors of CHECK_ATTACHED since this
|
||||
is called during construction and finalization where self->ok == 0. */
|
||||
if (self->buffer == NULL) {
|
||||
if (self->ok <= 0) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"I/O operation on uninitialized object");
|
||||
}
|
||||
else {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"underlying buffer has been detached");
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Returning a borrowed reference is safe since TextIOWrapper methods are
|
||||
protected by critical sections. */
|
||||
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self);
|
||||
return self->buffer;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
buffer_getattr(textio *self, PyObject *attr_name)
|
||||
{
|
||||
PyObject *buffer = buffer_access_safe(self);
|
||||
if (buffer == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return PyObject_GetAttr(buffer, attr_name);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
buffer_callmethod_noargs(textio *self, PyObject *name)
|
||||
{
|
||||
PyObject *buffer = buffer_access_safe(self);
|
||||
if (buffer == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return PyObject_CallMethodNoArgs(buffer, name);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
buffer_callmethod_onearg(textio *self, PyObject *name, PyObject *arg)
|
||||
{
|
||||
PyObject *buffer = buffer_access_safe(self);
|
||||
if (buffer == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return PyObject_CallMethodOneArg(buffer, name, arg);
|
||||
}
|
||||
|
||||
static void
|
||||
textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
|
||||
|
||||
|
|
@ -898,7 +961,7 @@ _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
|
|||
PyObject *res;
|
||||
int r;
|
||||
|
||||
res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
|
||||
res = buffer_callmethod_noargs(self, &_Py_ID(readable));
|
||||
if (res == NULL)
|
||||
return -1;
|
||||
|
||||
|
|
@ -954,7 +1017,7 @@ _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
|
|||
PyObject *res;
|
||||
int r;
|
||||
|
||||
res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
|
||||
res = buffer_callmethod_noargs(self, &_Py_ID(writable));
|
||||
if (res == NULL)
|
||||
return -1;
|
||||
|
||||
|
|
@ -1000,8 +1063,7 @@ _textiowrapper_fix_encoder_state(textio *self)
|
|||
|
||||
self->encoding_start_of_stream = 1;
|
||||
|
||||
PyObject *cookieObj = PyObject_CallMethodNoArgs(
|
||||
self->buffer, &_Py_ID(tell));
|
||||
PyObject *cookieObj = buffer_callmethod_noargs(self, &_Py_ID(tell));
|
||||
if (cookieObj == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
|
@ -1061,6 +1123,7 @@ io_check_errors(PyObject *errors)
|
|||
|
||||
|
||||
/*[clinic input]
|
||||
@critical_section
|
||||
_io.TextIOWrapper.__init__
|
||||
buffer: object
|
||||
encoding: str(accept={str, NoneType}) = None
|
||||
|
|
@ -1104,7 +1167,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
|
|||
const char *encoding, PyObject *errors,
|
||||
const char *newline, int line_buffering,
|
||||
int write_through)
|
||||
/*[clinic end generated code: output=72267c0c01032ed2 input=e6cfaaaf6059d4f5]*/
|
||||
/*[clinic end generated code: output=72267c0c01032ed2 input=0f077220214c40a4]*/
|
||||
{
|
||||
PyObject *raw, *codec_info = NULL;
|
||||
PyObject *res;
|
||||
|
|
@ -1568,11 +1631,14 @@ _io_TextIOWrapper_detach_impl(textio *self)
|
|||
/*[clinic end generated code: output=7ba3715cd032d5f2 input=c908a3b4ef203b0f]*/
|
||||
{
|
||||
PyObject *buffer;
|
||||
CHECK_ATTACHED(self);
|
||||
if (_PyFile_Flush((PyObject *)self) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
buffer = self->buffer;
|
||||
/* _PyFile_Flush could detach before returning; raise an exception. */
|
||||
buffer = buffer_access_safe(self);
|
||||
if (buffer == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
self->buffer = NULL;
|
||||
self->detached = 1;
|
||||
return buffer;
|
||||
|
|
@ -1641,7 +1707,7 @@ _textiowrapper_writeflush(textio *self)
|
|||
|
||||
PyObject *ret;
|
||||
do {
|
||||
ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b);
|
||||
ret = buffer_callmethod_onearg(self, &_Py_ID(write), b);
|
||||
} while (ret == NULL && _PyIO_trap_eintr());
|
||||
Py_DECREF(b);
|
||||
// NOTE: We cleared buffer but we don't know how many bytes are actually written
|
||||
|
|
@ -1787,7 +1853,8 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
|
|||
}
|
||||
|
||||
if (needflush) {
|
||||
if (_PyFile_Flush(self->buffer) < 0) {
|
||||
PyObject *buffer = buffer_access_safe(self);
|
||||
if (buffer == NULL || _PyFile_Flush(buffer) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
|
@ -1917,9 +1984,10 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
|
|||
if (chunk_size == NULL)
|
||||
goto fail;
|
||||
|
||||
input_chunk = PyObject_CallMethodOneArg(self->buffer,
|
||||
(self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)),
|
||||
chunk_size);
|
||||
input_chunk = buffer_callmethod_onearg(self,
|
||||
(self->has_read1 ? &_Py_ID(read1) :
|
||||
&_Py_ID(read)),
|
||||
chunk_size);
|
||||
Py_DECREF(chunk_size);
|
||||
if (input_chunk == NULL)
|
||||
goto fail;
|
||||
|
|
@ -2003,7 +2071,7 @@ _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
|
|||
|
||||
if (n < 0) {
|
||||
/* Read everything */
|
||||
PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read));
|
||||
PyObject *bytes = buffer_callmethod_noargs(self, &_Py_ID(read));
|
||||
PyObject *decoded;
|
||||
if (bytes == NULL)
|
||||
goto fail;
|
||||
|
|
@ -2600,7 +2668,11 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
|
|||
Py_DECREF(res);
|
||||
}
|
||||
|
||||
res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2);
|
||||
PyObject *buf = buffer_access_safe(self);
|
||||
if (buf == NULL) {
|
||||
goto fail;
|
||||
}
|
||||
res = _PyObject_CallMethod(buf, &_Py_ID(seek), "ii", 0, 2);
|
||||
Py_CLEAR(cookieObj);
|
||||
if (res == NULL)
|
||||
goto fail;
|
||||
|
|
@ -2648,7 +2720,7 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
|
|||
posobj = PyLong_FromOff_t(cookie.start_pos);
|
||||
if (posobj == NULL)
|
||||
goto fail;
|
||||
res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj);
|
||||
res = buffer_callmethod_onearg(self, &_Py_ID(seek), posobj);
|
||||
Py_DECREF(posobj);
|
||||
if (res == NULL)
|
||||
goto fail;
|
||||
|
|
@ -2665,8 +2737,15 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
|
|||
|
||||
if (cookie.chars_to_skip) {
|
||||
/* Just like _read_chunk, feed the decoder and save a snapshot. */
|
||||
PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read),
|
||||
"i", cookie.bytes_to_feed);
|
||||
PyObject *bytes_to_feed = PyLong_FromLong(cookie.bytes_to_feed);
|
||||
if (bytes_to_feed == NULL) {
|
||||
goto fail;
|
||||
}
|
||||
PyObject *input_chunk = buffer_callmethod_onearg(self,
|
||||
&_Py_ID(read),
|
||||
bytes_to_feed);
|
||||
Py_DECREF(bytes_to_feed);
|
||||
|
||||
PyObject *decoded;
|
||||
|
||||
if (input_chunk == NULL)
|
||||
|
|
@ -2765,7 +2844,7 @@ _io_TextIOWrapper_tell_impl(textio *self)
|
|||
goto fail;
|
||||
}
|
||||
|
||||
posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell));
|
||||
posobj = buffer_callmethod_noargs(self, &_Py_ID(tell));
|
||||
if (posobj == NULL)
|
||||
goto fail;
|
||||
|
||||
|
|
@ -2975,7 +3054,7 @@ _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
|
||||
return buffer_callmethod_onearg(self, &_Py_ID(truncate), pos);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
|
@ -3057,8 +3136,7 @@ static PyObject *
|
|||
_io_TextIOWrapper_fileno_impl(textio *self)
|
||||
/*[clinic end generated code: output=21490a4c3da13e6c input=515e1196aceb97ab]*/
|
||||
{
|
||||
CHECK_ATTACHED(self);
|
||||
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno));
|
||||
return buffer_callmethod_noargs(self, &_Py_ID(fileno));
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
|
|
@ -3070,8 +3148,7 @@ static PyObject *
|
|||
_io_TextIOWrapper_seekable_impl(textio *self)
|
||||
/*[clinic end generated code: output=ab223dbbcffc0f00 input=71c4c092736c549b]*/
|
||||
{
|
||||
CHECK_ATTACHED(self);
|
||||
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable));
|
||||
return buffer_callmethod_noargs(self, &_Py_ID(seekable));
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
|
|
@ -3083,8 +3160,7 @@ static PyObject *
|
|||
_io_TextIOWrapper_readable_impl(textio *self)
|
||||
/*[clinic end generated code: output=72ff7ba289a8a91b input=80438d1f01b0a89b]*/
|
||||
{
|
||||
CHECK_ATTACHED(self);
|
||||
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
|
||||
return buffer_callmethod_noargs(self, &_Py_ID(readable));
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
|
|
@ -3096,8 +3172,7 @@ static PyObject *
|
|||
_io_TextIOWrapper_writable_impl(textio *self)
|
||||
/*[clinic end generated code: output=a728c71790d03200 input=9d6c22befb0c340a]*/
|
||||
{
|
||||
CHECK_ATTACHED(self);
|
||||
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
|
||||
return buffer_callmethod_noargs(self, &_Py_ID(writable));
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
|
|
@ -3109,8 +3184,7 @@ static PyObject *
|
|||
_io_TextIOWrapper_isatty_impl(textio *self)
|
||||
/*[clinic end generated code: output=12be1a35bace882e input=7f83ff04d4d1733d]*/
|
||||
{
|
||||
CHECK_ATTACHED(self);
|
||||
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty));
|
||||
return buffer_callmethod_noargs(self, &_Py_ID(isatty));
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
|
|
@ -3127,7 +3201,7 @@ _io_TextIOWrapper_flush_impl(textio *self)
|
|||
self->telling = self->seekable;
|
||||
if (_textiowrapper_writeflush(self) < 0)
|
||||
return NULL;
|
||||
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
|
||||
return buffer_callmethod_noargs(self, &_Py_ID(flush));
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
|
|
@ -3160,8 +3234,9 @@ _io_TextIOWrapper_close_impl(textio *self)
|
|||
else {
|
||||
PyObject *exc = NULL;
|
||||
if (self->finalizing) {
|
||||
res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn),
|
||||
(PyObject *)self);
|
||||
res = buffer_callmethod_onearg(self,
|
||||
&_Py_ID(_dealloc_warn),
|
||||
(PyObject *)self);
|
||||
if (res) {
|
||||
Py_DECREF(res);
|
||||
}
|
||||
|
|
@ -3173,7 +3248,7 @@ _io_TextIOWrapper_close_impl(textio *self)
|
|||
exc = PyErr_GetRaisedException();
|
||||
}
|
||||
|
||||
res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close));
|
||||
res = buffer_callmethod_noargs(self, &_Py_ID(close));
|
||||
if (exc != NULL) {
|
||||
_PyErr_ChainExceptions1(exc);
|
||||
Py_CLEAR(res);
|
||||
|
|
@ -3241,8 +3316,7 @@ static PyObject *
|
|||
_io_TextIOWrapper_name_get_impl(textio *self)
|
||||
/*[clinic end generated code: output=8c2f1d6d8756af40 input=26ecec9b39e30e07]*/
|
||||
{
|
||||
CHECK_ATTACHED(self);
|
||||
return PyObject_GetAttr(self->buffer, &_Py_ID(name));
|
||||
return buffer_getattr(self, &_Py_ID(name));
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
|
|
@ -3255,8 +3329,17 @@ static PyObject *
|
|||
_io_TextIOWrapper_closed_get_impl(textio *self)
|
||||
/*[clinic end generated code: output=b49b68f443a85e3c input=7dfcf43f63c7003d]*/
|
||||
{
|
||||
CHECK_ATTACHED(self);
|
||||
return PyObject_GetAttr(self->buffer, &_Py_ID(closed));
|
||||
/* If partially constructed or deconstructed, return that the underlying
|
||||
buffer is closed.
|
||||
|
||||
The code managing the transition is responsible for closing. The closed
|
||||
attribute is often called in re-initalization, as part of repr in error
|
||||
cases, and when the I/O stack is garbage collected. */
|
||||
if (self->ok <= 0) {
|
||||
Py_RETURN_TRUE;
|
||||
}
|
||||
|
||||
return buffer_getattr(self, &_Py_ID(closed));
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue