gh-129005: Align FileIO.readall between _pyio and _io (#129705)

Utilize `bytearray.resize()` and `os.readinto()` to reduce copies
and match behavior of `_io.FileIO.readall()`.

There is still an extra copy which means twice the memory required
compared to FileIO because there isn't a zero-copy  path from
`bytearray` -> `bytes` currently.

On my system reading a 2 GB file:
`./python -m test -M8g -uall test_largefile -m test.test_largefile.PyLargeFileTest.test_large_read -v`

Goes from ~2.7 seconds -> ~2.2 seconds

Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
Cody Maloney 2025-02-07 03:06:11 -08:00 committed by GitHub
parent ae132edc29
commit a3d5aab9a8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 25 additions and 14 deletions

View file

@ -1454,6 +1454,17 @@ def write(self, b):
return BufferedWriter.write(self, b) return BufferedWriter.write(self, b)
def _new_buffersize(bytes_read):
# Parallels _io/fileio.c new_buffersize
if bytes_read > 65536:
addend = bytes_read >> 3
else:
addend = 256 + bytes_read
if addend < DEFAULT_BUFFER_SIZE:
addend = DEFAULT_BUFFER_SIZE
return bytes_read + addend
class FileIO(RawIOBase): class FileIO(RawIOBase):
_fd = -1 _fd = -1
_created = False _created = False
@ -1672,22 +1683,20 @@ def readall(self):
except OSError: except OSError:
pass pass
result = bytearray() result = bytearray(bufsize)
while True: bytes_read = 0
if len(result) >= bufsize:
bufsize = len(result)
bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
n = bufsize - len(result)
try: try:
chunk = os.read(self._fd, n) while n := os.readinto(self._fd, memoryview(result)[bytes_read:]):
bytes_read += n
if bytes_read >= len(result):
result.resize(_new_buffersize(bytes_read))
except BlockingIOError: except BlockingIOError:
if result: if not bytes_read:
break
return None return None
if not chunk: # reached the end of the file
break
result += chunk
assert len(result) - bytes_read >= 1, \
"os.readinto buffer size 0 will result in erroneous EOF / returns 0"
result.resize(bytes_read)
return bytes(result) return bytes(result)
def readinto(self, buffer): def readinto(self, buffer):

View file

@ -0,0 +1,2 @@
``_pyio.FileIO.readall()`` now allocates, resizes, and fills a data buffer
using the same algorithm ``_io.FileIO.readall()`` uses.