mirror of
https://github.com/python/cpython.git
synced 2025-12-31 04:23:37 +00:00
gh-79986: Add parsing for References/In-Reply-To email headers (#137201)
This is a followup to 46d88a1131 (#13397),
which added parsing for Message-ID. Similar handling is needed for the
other two identification headers.
This commit is contained in:
parent
4eab90f4f3
commit
79aa43a979
5 changed files with 137 additions and 0 deletions
|
|
@ -878,6 +878,12 @@ class MessageID(MsgID):
|
|||
class InvalidMessageID(MessageID):
|
||||
token_type = 'invalid-message-id'
|
||||
|
||||
class MessageIDList(TokenList):
|
||||
token_type = 'message-id-list'
|
||||
|
||||
@property
|
||||
def message_ids(self):
|
||||
return [x for x in self if x.token_type=='msg-id']
|
||||
|
||||
class Header(TokenList):
|
||||
token_type = 'header'
|
||||
|
|
@ -2175,6 +2181,32 @@ def parse_message_id(value):
|
|||
|
||||
return message_id
|
||||
|
||||
def parse_message_ids(value):
|
||||
"""in-reply-to = "In-Reply-To:" 1*msg-id CRLF
|
||||
references = "References:" 1*msg-id CRLF
|
||||
"""
|
||||
message_id_list = MessageIDList()
|
||||
while value:
|
||||
if value[0] == ',':
|
||||
# message id list separated with commas - this is invalid,
|
||||
# but happens rather frequently in the wild
|
||||
message_id_list.defects.append(
|
||||
errors.InvalidHeaderDefect("comma in msg-id list"))
|
||||
message_id_list.append(
|
||||
WhiteSpaceTerminal(' ', 'invalid-comma-replacement'))
|
||||
value = value[1:]
|
||||
continue
|
||||
try:
|
||||
token, value = get_msg_id(value)
|
||||
message_id_list.append(token)
|
||||
except errors.HeaderParseError as ex:
|
||||
token = get_unstructured(value)
|
||||
message_id_list.append(InvalidMessageID(token))
|
||||
message_id_list.defects.append(
|
||||
errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(ex)))
|
||||
break
|
||||
return message_id_list
|
||||
|
||||
#
|
||||
# XXX: As I begin to add additional header parsers, I'm realizing we probably
|
||||
# have two level of parser routines: the get_XXX methods that get a token in
|
||||
|
|
|
|||
|
|
@ -534,6 +534,18 @@ def parse(cls, value, kwds):
|
|||
kwds['defects'].extend(parse_tree.all_defects)
|
||||
|
||||
|
||||
class ReferencesHeader:
|
||||
|
||||
max_count = 1
|
||||
value_parser = staticmethod(parser.parse_message_ids)
|
||||
|
||||
@classmethod
|
||||
def parse(cls, value, kwds):
|
||||
kwds['parse_tree'] = parse_tree = cls.value_parser(value)
|
||||
kwds['decoded'] = str(parse_tree)
|
||||
kwds['defects'].extend(parse_tree.all_defects)
|
||||
|
||||
|
||||
# The header factory #
|
||||
|
||||
_default_header_map = {
|
||||
|
|
@ -557,6 +569,8 @@ def parse(cls, value, kwds):
|
|||
'content-disposition': ContentDispositionHeader,
|
||||
'content-transfer-encoding': ContentTransferEncodingHeader,
|
||||
'message-id': MessageIDHeader,
|
||||
'in-reply-to': ReferencesHeader,
|
||||
'references': ReferencesHeader,
|
||||
}
|
||||
|
||||
class HeaderRegistry:
|
||||
|
|
|
|||
|
|
@ -2867,6 +2867,81 @@ def test_get_msg_id_ws_only_local(self):
|
|||
)
|
||||
self.assertEqual(msg_id.token_type, 'msg-id')
|
||||
|
||||
def test_parse_message_ids_valid(self):
|
||||
message_ids = self._test_parse_x(
|
||||
parser.parse_message_ids,
|
||||
"<foo@bar> <bar@foo>",
|
||||
"<foo@bar> <bar@foo>",
|
||||
"<foo@bar> <bar@foo>",
|
||||
[],
|
||||
)
|
||||
self.assertEqual(message_ids.token_type, 'message-id-list')
|
||||
|
||||
def test_parse_message_ids_empty(self):
|
||||
message_ids = self._test_parse_x(
|
||||
parser.parse_message_ids,
|
||||
" ",
|
||||
" ",
|
||||
" ",
|
||||
[errors.InvalidHeaderDefect],
|
||||
)
|
||||
self.assertEqual(message_ids.token_type, 'message-id-list')
|
||||
|
||||
def test_parse_message_ids_comment(self):
|
||||
message_ids = self._test_parse_x(
|
||||
parser.parse_message_ids,
|
||||
"<foo@bar> (foo's message from \"bar\")",
|
||||
"<foo@bar> (foo's message from \"bar\")",
|
||||
"<foo@bar> ",
|
||||
[],
|
||||
)
|
||||
self.assertEqual(message_ids.message_ids[0].value, '<foo@bar> ')
|
||||
self.assertEqual(message_ids.token_type, 'message-id-list')
|
||||
|
||||
def test_parse_message_ids_no_sep(self):
|
||||
message_ids = self._test_parse_x(
|
||||
parser.parse_message_ids,
|
||||
"<foo@bar><bar@foo>",
|
||||
"<foo@bar><bar@foo>",
|
||||
"<foo@bar><bar@foo>",
|
||||
[],
|
||||
)
|
||||
self.assertEqual(message_ids.message_ids[0].value, '<foo@bar>')
|
||||
self.assertEqual(message_ids.message_ids[1].value, '<bar@foo>')
|
||||
self.assertEqual(message_ids.token_type, 'message-id-list')
|
||||
|
||||
def test_parse_message_ids_comma_sep(self):
|
||||
message_ids = self._test_parse_x(
|
||||
parser.parse_message_ids,
|
||||
"<foo@bar>,<bar@foo>",
|
||||
"<foo@bar> <bar@foo>",
|
||||
"<foo@bar> <bar@foo>",
|
||||
[errors.InvalidHeaderDefect],
|
||||
)
|
||||
self.assertEqual(message_ids.message_ids[0].value, '<foo@bar>')
|
||||
self.assertEqual(message_ids.message_ids[1].value, '<bar@foo>')
|
||||
self.assertEqual(message_ids.token_type, 'message-id-list')
|
||||
|
||||
def test_parse_message_ids_invalid_id(self):
|
||||
message_ids = self._test_parse_x(
|
||||
parser.parse_message_ids,
|
||||
"<Date: Wed, 08 Jun 2002 09:78:58 +0600>",
|
||||
"<Date: Wed, 08 Jun 2002 09:78:58 +0600>",
|
||||
"<Date: Wed, 08 Jun 2002 09:78:58 +0600>",
|
||||
[errors.InvalidHeaderDefect]*2,
|
||||
)
|
||||
self.assertEqual(message_ids.token_type, 'message-id-list')
|
||||
|
||||
def test_parse_message_ids_broken_ang(self):
|
||||
message_ids = self._test_parse_x(
|
||||
parser.parse_message_ids,
|
||||
"<foo@bar> >bar@foo",
|
||||
"<foo@bar> >bar@foo",
|
||||
"<foo@bar> >bar@foo",
|
||||
[errors.InvalidHeaderDefect]*1,
|
||||
)
|
||||
self.assertEqual(message_ids.token_type, 'message-id-list')
|
||||
|
||||
|
||||
|
||||
@parameterize
|
||||
|
|
|
|||
|
|
@ -1821,5 +1821,18 @@ def test_message_id_header_is_not_folded(self):
|
|||
h.fold(policy=policy.default.clone(max_line_length=20)),
|
||||
'Message-ID:\n <ईमेलfromMessage@wők.com>\n')
|
||||
|
||||
def test_fold_references(self):
|
||||
h = self.make_header(
|
||||
'References',
|
||||
'<referenceid1thatislongerthan@maxlinelength.com> '
|
||||
'<referenceid2thatislongerthan@maxlinelength.com>'
|
||||
)
|
||||
self.assertEqual(
|
||||
h.fold(policy=policy.default.clone(max_line_length=20)),
|
||||
'References: '
|
||||
'<referenceid1thatislongerthan@maxlinelength.com>\n'
|
||||
' <referenceid2thatislongerthan@maxlinelength.com>\n')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -0,0 +1,3 @@
|
|||
Add parsing for ``References`` and ``In-Reply-To`` headers to the :mod:`email`
|
||||
library that parses the header content as lists of message id tokens. This
|
||||
prevents them from being folded incorrectly.
|
||||
Loading…
Add table
Add a link
Reference in a new issue