gh-119451: Fix a potential denial of service in http.client (GH-119454)

Reading the whole body of the HTTP response could cause OOM if
the Content-Length value is too large even if the server does not send
a large amount of data. Now the HTTP client reads large data by chunks,
therefore the amount of consumed memory is proportional to the amount
of sent data.
This commit is contained in:
Serhiy Storchaka 2025-12-01 17:26:07 +02:00 committed by GitHub
parent d4fa70706c
commit 5a4c4a033a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 95 additions and 4 deletions

View file

@ -111,6 +111,11 @@
_MAXLINE = 65536 _MAXLINE = 65536
_MAXHEADERS = 100 _MAXHEADERS = 100
# Data larger than this will be read in chunks, to prevent extreme
# overallocation.
_MIN_READ_BUF_SIZE = 1 << 20
# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) # Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
# #
# VCHAR = %x21-7E # VCHAR = %x21-7E
@ -642,10 +647,25 @@ def _safe_read(self, amt):
reading. If the bytes are truly not available (due to EOF), then the reading. If the bytes are truly not available (due to EOF), then the
IncompleteRead exception can be used to detect the problem. IncompleteRead exception can be used to detect the problem.
""" """
data = self.fp.read(amt) cursize = min(amt, _MIN_READ_BUF_SIZE)
if len(data) < amt: data = self.fp.read(cursize)
raise IncompleteRead(data, amt-len(data)) if len(data) >= amt:
return data return data
if len(data) < cursize:
raise IncompleteRead(data, amt - len(data))
data = io.BytesIO(data)
data.seek(0, 2)
while True:
# This is a geometric increase in read size (never more than
# doubling out the current length of data per loop iteration).
delta = min(cursize, amt - cursize)
data.write(self.fp.read(delta))
if data.tell() >= amt:
return data.getvalue()
cursize += delta
if data.tell() < cursize:
raise IncompleteRead(data.getvalue(), amt - data.tell())
def _safe_readinto(self, b): def _safe_readinto(self, b):
"""Same as _safe_read, but for reading into a buffer.""" """Same as _safe_read, but for reading into a buffer."""

View file

@ -1511,6 +1511,72 @@ def run_server():
thread.join() thread.join()
self.assertEqual(result, b"proxied data\n") self.assertEqual(result, b"proxied data\n")
def test_large_content_length(self):
serv = socket.create_server((HOST, 0))
self.addCleanup(serv.close)
def run_server():
[conn, address] = serv.accept()
with conn:
while conn.recv(1024):
conn.sendall(
b"HTTP/1.1 200 Ok\r\n"
b"Content-Length: %d\r\n"
b"\r\n" % size)
conn.sendall(b'A' * (size//3))
conn.sendall(b'B' * (size - size//3))
thread = threading.Thread(target=run_server)
thread.start()
self.addCleanup(thread.join, 1.0)
conn = client.HTTPConnection(*serv.getsockname())
try:
for w in range(15, 27):
size = 1 << w
conn.request("GET", "/")
with conn.getresponse() as response:
self.assertEqual(len(response.read()), size)
finally:
conn.close()
thread.join(1.0)
def test_large_content_length_truncated(self):
serv = socket.create_server((HOST, 0))
self.addCleanup(serv.close)
def run_server():
while True:
[conn, address] = serv.accept()
with conn:
conn.recv(1024)
if not size:
break
conn.sendall(
b"HTTP/1.1 200 Ok\r\n"
b"Content-Length: %d\r\n"
b"\r\n"
b"Text" % size)
thread = threading.Thread(target=run_server)
thread.start()
self.addCleanup(thread.join, 1.0)
conn = client.HTTPConnection(*serv.getsockname())
try:
for w in range(18, 65):
size = 1 << w
conn.request("GET", "/")
with conn.getresponse() as response:
self.assertRaises(client.IncompleteRead, response.read)
conn.close()
finally:
conn.close()
size = 0
conn.request("GET", "/")
conn.close()
thread.join(1.0)
def test_putrequest_override_domain_validation(self): def test_putrequest_override_domain_validation(self):
""" """
It should be possible to override the default validation It should be possible to override the default validation

View file

@ -0,0 +1,5 @@
Fix a potential memory denial of service in the :mod:`http.client` module.
When connecting to a malicious server, it could cause
an arbitrary amount of memory to be allocated.
This could have led to symptoms including a :exc:`MemoryError`, swapping, out
of memory (OOM) killed processes or containers, or even system crashes.