gh-119451: Fix a potential denial of service in http.client (GH-119454)

Reading the whole body of the HTTP response could cause OOM if
the Content-Length value is too large even if the server does not send
a large amount of data. Now the HTTP client reads large data by chunks,
therefore the amount of consumed memory is proportional to the amount
of sent data.
This commit is contained in:
Serhiy Storchaka 2025-12-01 17:26:07 +02:00 committed by GitHub
parent d4fa70706c
commit 5a4c4a033a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 95 additions and 4 deletions

View file

@ -111,6 +111,11 @@
_MAXLINE = 65536
_MAXHEADERS = 100
# Data larger than this will be read in chunks, to prevent extreme
# overallocation.
_MIN_READ_BUF_SIZE = 1 << 20
# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
#
# VCHAR = %x21-7E
@ -642,10 +647,25 @@ def _safe_read(self, amt):
reading. If the bytes are truly not available (due to EOF), then the
IncompleteRead exception can be used to detect the problem.
"""
data = self.fp.read(amt)
if len(data) < amt:
raise IncompleteRead(data, amt-len(data))
return data
cursize = min(amt, _MIN_READ_BUF_SIZE)
data = self.fp.read(cursize)
if len(data) >= amt:
return data
if len(data) < cursize:
raise IncompleteRead(data, amt - len(data))
data = io.BytesIO(data)
data.seek(0, 2)
while True:
# This is a geometric increase in read size (never more than
# doubling out the current length of data per loop iteration).
delta = min(cursize, amt - cursize)
data.write(self.fp.read(delta))
if data.tell() >= amt:
return data.getvalue()
cursize += delta
if data.tell() < cursize:
raise IncompleteRead(data.getvalue(), amt - data.tell())
def _safe_readinto(self, b):
"""Same as _safe_read, but for reading into a buffer."""