diff --git a/Lib/email/header.py b/Lib/email/header.py index 2e687b7a6f1..3250d367edd 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -283,7 +283,12 @@ def append(self, s, charset=None, errors='strict'): # character set, otherwise an early error is thrown. output_charset = charset.output_codec or 'us-ascii' if output_charset != _charset.UNKNOWN8BIT: - s.encode(output_charset, errors) + try: + s.encode(output_charset, errors) + except UnicodeEncodeError: + if output_charset!='us-ascii': + raise + charset = UTF8 self._chunks.append((s, charset)) def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 121c939561c..a4d39ab7793 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -604,6 +604,19 @@ def test_embeded_header_via_string_rejected(self): msg['Dummy'] = 'dummy\nX-Injected-Header: test' self.assertRaises(errors.HeaderParseError, msg.as_string) + def test_unicode_header_defaults_to_utf8_encoding(self): + # Issue 14291 + m = MIMEText('abc\n') + m['Subject'] = 'É test' + self.assertEqual(str(m),textwrap.dedent("""\ + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Subject: =?utf-8?q?=C3=89_test?= + + abc + """)) + # Test the email.encoders module class TestEncoders(unittest.TestCase): @@ -1045,9 +1058,13 @@ def test_long_8bit_header_no_charset(self): 'f\xfcr Offshore-Windkraftprojekte ' '') msg['Reply-To'] = header_string - self.assertRaises(UnicodeEncodeError, msg.as_string) + eq(msg.as_string(maxheaderlen=78), """\ +Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= + =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= + +""") msg = Message() - msg['Reply-To'] = Header(header_string, 'utf-8', + msg['Reply-To'] = Header(header_string, header_name='Reply-To') eq(msg.as_string(maxheaderlen=78), """\ Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= diff --git a/Lib/test/test_mailbox.py b/Lib/test/test_mailbox.py index 212ceb94fc7..54963a903ab 100644 --- a/Lib/test/test_mailbox.py +++ b/Lib/test/test_mailbox.py @@ -111,10 +111,10 @@ def test_add_nonascii_string_header_raises(self): self.assertMailboxEmpty() def test_add_that_raises_leaves_mailbox_empty(self): - # XXX This test will start failing when Message learns to handle - # non-ASCII string headers, and a different internal failure will - # need to be found or manufactured. - with self.assertRaises(ValueError): + def raiser(*args, **kw): + raise Exception("a fake error") + support.patch(self, email.generator.BytesGenerator, 'flatten', raiser) + with self.assertRaises(Exception): self._box.add(email.message_from_string("From: Alphöso")) self.assertEqual(len(self._box), 0) self._box.close() diff --git a/Misc/ACKS b/Misc/ACKS index a11d4ebdd87..eeb61e08e0d 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -470,6 +470,7 @@ Gerhard Häring Fredrik Håård Catalin Iacob Mihai Ibanescu +Ali Ikinci Lars Immisch Bobby Impollonia Meador Inge diff --git a/Misc/NEWS b/Misc/NEWS index 9b4d2c2ef95..0d6cdbb13d0 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -24,6 +24,9 @@ Core and Builtins Library ------- +- Issue #14291: Email now defaults to utf-8 for non-ASCII unicode headers + instead of raising an error. This fixes a regression relative to 2.7. + - Issue #989712: Support using Tk without a mainloop. - Issue #5219: Prevent event handler cascade in IDLE.