gh-129005: Align FileIO.readall between _pyio and _io (#129705)

Utilize `bytearray.resize()` and `os.readinto()` to reduce copies and match behavior of `_io.FileIO.readall()`. There is still an extra copy which means twice the memory required compared to FileIO because there isn't a zero-copy path from `bytearray` -> `bytes` currently. On my system reading a 2 GB file: `./python -m test -M8g -uall test_largefile -m test.test_largefile.PyLargeFileTest.test_large_read -v` Goes from ~2.7 seconds -> ~2.2 seconds Co-authored-by: Victor Stinner <vstinner@python.org>
2025-12-08 06:10:17 +00:00 · 2025-02-07 03:06:11 -08:00 · 2025-02-07 03:06:11 -08:00 · a3d5aab9a8
commit a3d5aab9a8
parent ae132edc29
2 changed files with 25 additions and 14 deletions
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@ -1454,6 +1454,17 @@ def write(self, b):
        return BufferedWriter.write(self, b)


+def _new_buffersize(bytes_read):
+    # Parallels _io/fileio.c new_buffersize
+    if bytes_read > 65536:
+        addend = bytes_read >> 3
+    else:
+        addend = 256 + bytes_read
+    if addend < DEFAULT_BUFFER_SIZE:
+        addend = DEFAULT_BUFFER_SIZE
+    return bytes_read + addend
+
+
 class FileIO(RawIOBase):
    _fd = -1
    _created = False
@ -1672,22 +1683,20 @@ def readall(self):
                except OSError:
                    pass

-        result = bytearray()
-        while True:
-            if len(result) >= bufsize:
-                bufsize = len(result)
-                bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
-            n = bufsize - len(result)
-            try:
-                chunk = os.read(self._fd, n)
-            except BlockingIOError:
-                if result:
-                    break
+        result = bytearray(bufsize)
+        bytes_read = 0
+        try:
+            while n := os.readinto(self._fd, memoryview(result)[bytes_read:]):
+                bytes_read += n
+                if bytes_read >= len(result):
+                    result.resize(_new_buffersize(bytes_read))
+        except BlockingIOError:
+            if not bytes_read:
                return None
-            if not chunk: # reached the end of the file
-                break
-            result += chunk

+        assert len(result) - bytes_read >= 1, \
+            "os.readinto buffer size 0 will result in erroneous EOF / returns 0"
+        result.resize(bytes_read)
        return bytes(result)

    def readinto(self, buffer):