gh-148192: Fix Generator._make_boundary behavior with CRLF line endings. (#148193)

The Generator._make_boundary regex did not match on boundary phrases correctly when using CRLF line endings due to re.MULTILINE not considering \r\n as a line ending.
This commit is contained in:
Henry Jones 2026-04-15 01:10:08 +12:00 committed by GitHub
parent 74a4f8c1d0
commit 4af46b4ab5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 41 additions and 1 deletions

View file

@ -392,7 +392,7 @@ def _make_boundary(cls, text=None):
b = boundary
counter = 0
while True:
cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
cre = cls._compile_re('^--' + re.escape(b) + '(--)?\r?$', re.MULTILINE)
if not cre.search(text):
break
b = boundary + '.' + str(counter)

View file

@ -1,13 +1,20 @@
import io
import textwrap
import unittest
import random
import sys
from email import message_from_string, message_from_bytes
from email.message import EmailMessage
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.generator import Generator, BytesGenerator
import email.generator
from email.headerregistry import Address
from email import policy
import email.errors
from test.test_email import TestEmailBase, parameterize
import test.support
@parameterize
@ -288,6 +295,36 @@ def test_keep_long_encoded_newlines(self):
g.flatten(msg)
self.assertEqual(s.getvalue(), self.typ(expected))
def _test_boundary_detection(self, linesep):
# Generate a boundary token in the same way as _make_boundary
token = random.randrange(sys.maxsize)
def _patch_random_randrange(*args, **kwargs):
return token
with test.support.swap_attr(
random, "randrange", _patch_random_randrange
):
boundary = self.genclass._make_boundary(text=None)
boundary_in_part = (
"this goes before the boundary\n--"
+ boundary
+ "\nthis goes after\n"
)
msg = MIMEMultipart()
msg.attach(MIMEText(boundary_in_part))
self.genclass(self.ioclass()).flatten(msg, linesep=linesep)
# Generator checks the message content for the string it is about
# to use as a boundary ('token' in this test) and when it finds it
# in our attachment appends .0 to make the boundary it uses unique.
self.assertEqual(msg.get_boundary(), boundary + ".0")
def test_lf_boundary_detection(self):
self._test_boundary_detection("\n")
def test_crlf_boundary_detection(self):
self._test_boundary_detection("\r\n")
class TestGenerator(TestGeneratorBase, TestEmailBase):

View file

@ -0,0 +1,3 @@
``email.generator.Generator._make_boundary`` could fail to detect a duplicate
boundary string if linesep was not \n. It now correctly detects boundary
strings when linesep is \r\n as well.