gh-120754: Refactor I/O modules to stash whole stat result rather than individual members (#123412)

Multiple places in the I/O stack optimize common cases by using the
information from stat. Currently individual members are extracted from
the stat and stored into the fileio struct. Refactor the code to store
the whole stat struct instead.

Parallels the changes to _io. The `stat` Python object doesn't allow
changing members, so rather than modifying estimated_size, just clear
the value.
This commit is contained in:
Cody Maloney 2024-09-18 08:47:57 -07:00 committed by GitHub
parent 96f619faa7
commit 8b6c7c7877
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 81 additions and 46 deletions

View file

@ -242,14 +242,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
buffering = -1
line_buffering = True
if buffering < 0:
buffering = DEFAULT_BUFFER_SIZE
try:
bs = os.fstat(raw.fileno()).st_blksize
except (OSError, AttributeError):
pass
else:
if bs > 1:
buffering = bs
buffering = raw._blksize
if buffering < 0:
raise ValueError("invalid buffering size")
if buffering == 0:
@ -1565,19 +1558,15 @@ def __init__(self, file, mode='r', closefd=True, opener=None):
os.set_inheritable(fd, False)
self._closefd = closefd
fdfstat = os.fstat(fd)
self._stat_atopen = os.fstat(fd)
try:
if stat.S_ISDIR(fdfstat.st_mode):
if stat.S_ISDIR(self._stat_atopen.st_mode):
raise IsADirectoryError(errno.EISDIR,
os.strerror(errno.EISDIR), file)
except AttributeError:
# Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR
# don't exist.
pass
self._blksize = getattr(fdfstat, 'st_blksize', 0)
if self._blksize <= 1:
self._blksize = DEFAULT_BUFFER_SIZE
self._estimated_size = fdfstat.st_size
if _setmode:
# don't translate newlines (\r\n <=> \n)
@ -1623,6 +1612,17 @@ def __repr__(self):
return ('<%s name=%r mode=%r closefd=%r>' %
(class_name, name, self.mode, self._closefd))
@property
def _blksize(self):
if self._stat_atopen is None:
return DEFAULT_BUFFER_SIZE
blksize = getattr(self._stat_atopen, "st_blksize", 0)
# WASI sets blsize to 0
if not blksize:
return DEFAULT_BUFFER_SIZE
return blksize
def _checkReadable(self):
if not self._readable:
raise UnsupportedOperation('File not open for reading')
@ -1655,16 +1655,20 @@ def readall(self):
"""
self._checkClosed()
self._checkReadable()
if self._estimated_size <= 0:
if self._stat_atopen is None or self._stat_atopen.st_size <= 0:
bufsize = DEFAULT_BUFFER_SIZE
else:
bufsize = self._estimated_size + 1
# In order to detect end of file, need a read() of at least 1
# byte which returns size 0. Oversize the buffer by 1 byte so the
# I/O can be completed with two read() calls (one for all data, one
# for EOF) without needing to resize the buffer.
bufsize = self._stat_atopen.st_size + 1
if self._estimated_size > 65536:
if self._stat_atopen.st_size > 65536:
try:
pos = os.lseek(self._fd, 0, SEEK_CUR)
if self._estimated_size >= pos:
bufsize = self._estimated_size - pos + 1
if self._stat_atopen.st_size >= pos:
bufsize = self._stat_atopen.st_size - pos + 1
except OSError:
pass
@ -1742,7 +1746,7 @@ def truncate(self, size=None):
if size is None:
size = self.tell()
os.ftruncate(self._fd, size)
self._estimated_size = size
self._stat_atopen = None
return size
def close(self):