mirror of
https://github.com/python/cpython.git
synced 2025-11-01 06:01:29 +00:00
gh-129005: Align FileIO.readall between _pyio and _io (#129705)
Utilize `bytearray.resize()` and `os.readinto()` to reduce copies and match behavior of `_io.FileIO.readall()`. There is still an extra copy which means twice the memory required compared to FileIO because there isn't a zero-copy path from `bytearray` -> `bytes` currently. On my system reading a 2 GB file: `./python -m test -M8g -uall test_largefile -m test.test_largefile.PyLargeFileTest.test_large_read -v` Goes from ~2.7 seconds -> ~2.2 seconds Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
parent
ae132edc29
commit
a3d5aab9a8
2 changed files with 25 additions and 14 deletions
37
Lib/_pyio.py
37
Lib/_pyio.py
|
|
@ -1454,6 +1454,17 @@ def write(self, b):
|
|||
return BufferedWriter.write(self, b)
|
||||
|
||||
|
||||
def _new_buffersize(bytes_read):
|
||||
# Parallels _io/fileio.c new_buffersize
|
||||
if bytes_read > 65536:
|
||||
addend = bytes_read >> 3
|
||||
else:
|
||||
addend = 256 + bytes_read
|
||||
if addend < DEFAULT_BUFFER_SIZE:
|
||||
addend = DEFAULT_BUFFER_SIZE
|
||||
return bytes_read + addend
|
||||
|
||||
|
||||
class FileIO(RawIOBase):
|
||||
_fd = -1
|
||||
_created = False
|
||||
|
|
@ -1672,22 +1683,20 @@ def readall(self):
|
|||
except OSError:
|
||||
pass
|
||||
|
||||
result = bytearray()
|
||||
while True:
|
||||
if len(result) >= bufsize:
|
||||
bufsize = len(result)
|
||||
bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
|
||||
n = bufsize - len(result)
|
||||
try:
|
||||
chunk = os.read(self._fd, n)
|
||||
except BlockingIOError:
|
||||
if result:
|
||||
break
|
||||
result = bytearray(bufsize)
|
||||
bytes_read = 0
|
||||
try:
|
||||
while n := os.readinto(self._fd, memoryview(result)[bytes_read:]):
|
||||
bytes_read += n
|
||||
if bytes_read >= len(result):
|
||||
result.resize(_new_buffersize(bytes_read))
|
||||
except BlockingIOError:
|
||||
if not bytes_read:
|
||||
return None
|
||||
if not chunk: # reached the end of the file
|
||||
break
|
||||
result += chunk
|
||||
|
||||
assert len(result) - bytes_read >= 1, \
|
||||
"os.readinto buffer size 0 will result in erroneous EOF / returns 0"
|
||||
result.resize(bytes_read)
|
||||
return bytes(result)
|
||||
|
||||
def readinto(self, buffer):
|
||||
|
|
|
|||
|
|
@ -0,0 +1,2 @@
|
|||
``_pyio.FileIO.readall()`` now allocates, resizes, and fills a data buffer
|
||||
using the same algorithm ``_io.FileIO.readall()`` uses.
|
||||
Loading…
Add table
Add a link
Reference in a new issue