Prevent expandtabs() on string and unicode objects from causing a segfault when

a large width is passed on 32-bit platforms. Found by Google. It would be good for people to review this especially carefully and verify I don't have an off by one error and there is no other way to cause overflow.
2026-02-22 15:10:47 +00:00 · 2007-06-09 03:36:34 +00:00 · 2007-06-09 03:36:34 +00:00 · 7dbd2a3720
commit 7dbd2a3720
parent ea7f88e3d9
5 changed files with 49 additions and 8 deletions
--- a/Lib/test/test_str.py
+++ b/Lib/test/test_str.py
@ -1,4 +1,6 @@
+
 import unittest
+import sys
 from test import test_support, string_tests


@ -82,6 +84,15 @@ def __unicode__(self):
        self.assertEqual(str(Foo9("foo")), "string")
        self.assertEqual(unicode(Foo9("foo")), u"not unicode")

+    def test_expandtabs_overflows_gracefully(self):
+        # This test only affects 32-bit platforms because expandtabs can only take
+        # an int as the max value, not a 64-bit C long.  If expandtabs is changed
+        # to take a 64-bit long, this test should apply to all platforms.
+        if sys.maxint > (1 << 32):
+            return
+        self.assertRaises(OverflowError, 't\tt\t'.expandtabs, sys.maxint)
+
+
 def test_main():
    test_support.run_unittest(StrTest)

--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@ -817,8 +817,13 @@ def __repr__(self):
        self.assertEqual(repr(s1()), '\\n')
        self.assertEqual(repr(s2()), '\\n')

-
-
+    def test_expandtabs_overflows_gracefully(self):
+        # This test only affects 32-bit platforms because expandtabs can only take
+        # an int as the max value, not a 64-bit C long.  If expandtabs is changed
+        # to take a 64-bit long, this test should apply to all platforms.
+        if sys.maxint > (1 << 32):
+            return
+        self.assertRaises(OverflowError, u't\tt\t'.expandtabs, sys.maxint)


 def test_main():
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -12,6 +12,9 @@ What's New in Python 2.6 alpha 1?
 Core and builtins
 -----------------

+- Prevent expandtabs() on string and unicode objects from causing a segfault when
+  a large width is passed on 32-bit platforms.
+
 - Bug #1733488: Fix compilation of bufferobject.c on AIX.

 - Bug #1722485: remove docstrings again when running with -OO.
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@ -3307,7 +3307,7 @@ string_expandtabs(PyStringObject *self, PyObject *args)
 {
    const char *e, *p;
    char *q;
-    Py_ssize_t i, j;
+    Py_ssize_t i, j, old_j;
    PyObject *u;
    int tabsize = 8;

@ -3315,12 +3315,18 @@ string_expandtabs(PyStringObject *self, PyObject *args)
 	return NULL;

    /* First pass: determine size of output string */
-    i = j = 0;
+    i = j = old_j = 0;
    e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
    for (p = PyString_AS_STRING(self); p < e; p++)
        if (*p == '\t') {
-	    if (tabsize > 0)
+	    if (tabsize > 0) {
 		j += tabsize - (j % tabsize);
+		if (old_j > j) {
+		    PyErr_SetString(PyExc_OverflowError, "new string is too long");
+		    return NULL;
+		}
+		old_j = j;
+            }
 	}
        else {
            j++;
@ -3330,6 +3336,11 @@ string_expandtabs(PyStringObject *self, PyObject *args)
            }
        }

+    if ((i + j) < 0) {
+        PyErr_SetString(PyExc_OverflowError, "new string is too long");
+        return NULL;
+    }
+
    /* Second pass: create output string and fill it */
    u = PyString_FromStringAndSize(NULL, i + j);
    if (!u)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -5690,7 +5690,7 @@ unicode_expandtabs(PyUnicodeObject *self, PyObject *args)
    Py_UNICODE *e;
    Py_UNICODE *p;
    Py_UNICODE *q;
-    Py_ssize_t i, j;
+    Py_ssize_t i, j, old_j;
    PyUnicodeObject *u;
    int tabsize = 8;

@ -5698,12 +5698,18 @@ unicode_expandtabs(PyUnicodeObject *self, PyObject *args)
 	return NULL;

    /* First pass: determine size of output string */
-    i = j = 0;
+    i = j = old_j = 0;
    e = self->str + self->length;
    for (p = self->str; p < e; p++)
        if (*p == '\t') {
-	    if (tabsize > 0)
+	    if (tabsize > 0) {
 		j += tabsize - (j % tabsize);
+		if (old_j > j) {
+		    PyErr_SetString(PyExc_OverflowError, "new string is too long");
+		    return NULL;
+		}
+		old_j = j;
+	    }
 	}
        else {
            j++;
@ -5713,6 +5719,11 @@ unicode_expandtabs(PyUnicodeObject *self, PyObject *args)
            }
        }

+    if ((i + j) < 0) {
+        PyErr_SetString(PyExc_OverflowError, "new string is too long");
+        return NULL;
+    }
+
    /* Second pass: create output string and fill it */
    u = _PyUnicode_New(i + j);
    if (!u)