gh-119342: Fix a potential denial of service in plistlib (GH-119343)

Reading a specially prepared small Plist file could cause OOM because file's read(n) preallocates a bytes object for reading the specified amount of data. Now plistlib reads large data by chunks, therefore the upper limit of consumed memory is proportional to the size of the input file.
2025-12-08 06:10:17 +00:00 · 2025-12-01 17:28:15 +02:00 · 2025-12-01 17:28:15 +02:00 · 694922cf40
commit 694922cf40
parent 5a4c4a033a
3 changed files with 59 additions and 14 deletions
--- a/Lib/plistlib.py
+++ b/Lib/plistlib.py
@ -73,6 +73,9 @@
 PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
 globals().update(PlistFormat.__members__)

+# Data larger than this will be read in chunks, to prevent extreme
+# overallocation.
+_MIN_READ_BUF_SIZE = 1 << 20

 class UID:
    def __init__(self, data):
@ -508,12 +511,24 @@ def _get_size(self, tokenL):

        return tokenL

+    def _read(self, size):
+        cursize = min(size, _MIN_READ_BUF_SIZE)
+        data = self._fp.read(cursize)
+        while True:
+            if len(data) != cursize:
+                raise InvalidFileException
+            if cursize == size:
+                return data
+            delta = min(cursize, size - cursize)
+            data += self._fp.read(delta)
+            cursize += delta
+
    def _read_ints(self, n, size):
-        data = self._fp.read(size * n)
+        data = self._read(size * n)
        if size in _BINARY_FORMAT:
            return struct.unpack(f'>{n}{_BINARY_FORMAT[size]}', data)
        else:
-            if not size or len(data) != size * n:
+            if not size:
                raise InvalidFileException()
            return tuple(int.from_bytes(data[i: i + size], 'big')
                         for i in range(0, size * n, size))
@ -573,22 +588,16 @@ def _read_object(self, ref):

        elif tokenH == 0x40:  # data
            s = self._get_size(tokenL)
-            result = self._fp.read(s)
-            if len(result) != s:
-                raise InvalidFileException()
+            result = self._read(s)

        elif tokenH == 0x50:  # ascii string
            s = self._get_size(tokenL)
-            data = self._fp.read(s)
-            if len(data) != s:
-                raise InvalidFileException()
+            data = self._read(s)
            result = data.decode('ascii')

        elif tokenH == 0x60:  # unicode string
            s = self._get_size(tokenL) * 2
-            data = self._fp.read(s)
-            if len(data) != s:
-                raise InvalidFileException()
+            data = self._read(s)
            result = data.decode('utf-16be')

        elif tokenH == 0x80:  # UID