| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | # RFC-822 message manipulation class. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # XXX This is only a very rough sketch of a full RFC-822 parser; | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | # in particular the tokenizing of addresses does not adhere to all the | 
					
						
							|  |  |  | # quoting rules. | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | # | 
					
						
							|  |  |  | # Directions for use: | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # To create a Message object: first open a file, e.g.: | 
					
						
							|  |  |  | #   fp = open(file, 'r') | 
					
						
							|  |  |  | # (or use any other legal way of getting an open file object, e.g. use | 
					
						
							|  |  |  | # sys.stdin or call os.popen()). | 
					
						
							| 
									
										
										
										
											1993-12-17 15:25:27 +00:00
										 |  |  | # Then pass the open file object to the Message() constructor: | 
					
						
							|  |  |  | #   m = Message(fp) | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | # | 
					
						
							|  |  |  | # To get the text of a particular header there are several methods: | 
					
						
							|  |  |  | #   str = m.getheader(name) | 
					
						
							|  |  |  | #   str = m.getrawheader(name) | 
					
						
							|  |  |  | # where name is the name of the header, e.g. 'Subject'. | 
					
						
							|  |  |  | # The difference is that getheader() strips the leading and trailing | 
					
						
							|  |  |  | # whitespace, while getrawheader() doesn't.  Both functions retain | 
					
						
							|  |  |  | # embedded whitespace (including newlines) exactly as they are | 
					
						
							|  |  |  | # specified in the header, and leave the case of the text unchanged. | 
					
						
							|  |  |  | # | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | # For addresses and address lists there are functions | 
					
						
							|  |  |  | #   realname, mailaddress = m.getaddr(name) and | 
					
						
							|  |  |  | #   list = m.getaddrlist(name) | 
					
						
							|  |  |  | # where the latter returns a list of (realname, mailaddr) tuples. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # There is also a method | 
					
						
							|  |  |  | #   time = m.getdate(name) | 
					
						
							|  |  |  | # which parses a Date-like field and returns a time-compatible tuple, | 
					
						
							|  |  |  | # i.e. a tuple such as returned by time.localtime() or accepted by | 
					
						
							|  |  |  | # time.mktime(). | 
					
						
							|  |  |  | # | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | # See the class definition for lower level access methods. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # There are also some utility functions here. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import regex | 
					
						
							|  |  |  | import string | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | import time | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1995-06-22 19:06:57 +00:00
										 |  |  | _blanklines = ('\r\n', '\n')		# Optimization for islast() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | class Message: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	# Initialize the class instance and read the headers. | 
					
						
							|  |  |  | 	 | 
					
						
							| 
									
										
										
										
											1995-06-22 19:06:57 +00:00
										 |  |  | 	def __init__(self, fp, seekable = 1): | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 		self.fp = fp | 
					
						
							| 
									
										
										
										
											1995-06-22 19:06:57 +00:00
										 |  |  | 		self.seekable = seekable | 
					
						
							|  |  |  | 		self.startofheaders = None | 
					
						
							|  |  |  | 		self.startofbody = None | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 		# | 
					
						
							| 
									
										
										
										
											1995-06-22 19:06:57 +00:00
										 |  |  | 		if self.seekable: | 
					
						
							|  |  |  | 			try: | 
					
						
							|  |  |  | 				self.startofheaders = self.fp.tell() | 
					
						
							|  |  |  | 			except IOError: | 
					
						
							|  |  |  | 				self.seekable = 0 | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 		# | 
					
						
							|  |  |  | 		self.readheaders() | 
					
						
							|  |  |  | 		# | 
					
						
							| 
									
										
										
										
											1995-06-22 19:06:57 +00:00
										 |  |  | 		if self.seekable: | 
					
						
							|  |  |  | 			try: | 
					
						
							|  |  |  | 				self.startofbody = self.fp.tell() | 
					
						
							|  |  |  | 			except IOError: | 
					
						
							|  |  |  | 				self.seekable = 0 | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	# Rewind the file to the start of the body (if seekable). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def rewindbody(self): | 
					
						
							| 
									
										
										
										
											1995-06-22 19:06:57 +00:00
										 |  |  | 		if not self.seekable: | 
					
						
							|  |  |  | 			raise IOError, "unseekable file" | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 		self.fp.seek(self.startofbody) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	# Read header lines up to the entirely blank line that | 
					
						
							|  |  |  | 	# terminates them.  The (normally blank) line that ends the | 
					
						
							|  |  |  | 	# headers is skipped, but not included in the returned list. | 
					
						
							|  |  |  | 	# If a non-header line ends the headers, (which is an error), | 
					
						
							|  |  |  | 	# an attempt is made to backspace over it; it is never | 
					
						
							|  |  |  | 	# included in the returned list. | 
					
						
							|  |  |  | 	# | 
					
						
							|  |  |  | 	# The variable self.status is set to the empty string if all | 
					
						
							|  |  |  | 	# went well, otherwise it is an error message. | 
					
						
							|  |  |  | 	# The variable self.headers is a completely uninterpreted list | 
					
						
							|  |  |  | 	# of lines contained in the header (so printing them will | 
					
						
							|  |  |  | 	# reproduce the header exactly as it appears in the file). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def readheaders(self): | 
					
						
							| 
									
										
										
										
											1996-05-28 23:08:25 +00:00
										 |  |  | 		self.dict = {} | 
					
						
							| 
									
										
										
										
											1995-06-22 19:06:57 +00:00
										 |  |  | 		self.unixfrom = '' | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 		self.headers = list = [] | 
					
						
							|  |  |  | 		self.status = '' | 
					
						
							| 
									
										
										
										
											1996-05-28 23:08:25 +00:00
										 |  |  | 		headerseen = "" | 
					
						
							| 
									
										
										
										
											1995-06-13 11:19:48 +00:00
										 |  |  | 		firstline = 1 | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 		while 1: | 
					
						
							|  |  |  | 			line = self.fp.readline() | 
					
						
							|  |  |  | 			if not line: | 
					
						
							|  |  |  | 				self.status = 'EOF in headers' | 
					
						
							|  |  |  | 				break | 
					
						
							| 
									
										
										
										
											1995-06-13 11:19:48 +00:00
										 |  |  | 			# Skip unix From name time lines | 
					
						
							| 
									
										
										
										
											1995-06-16 10:57:14 +00:00
										 |  |  | 			if firstline and line[:5] == 'From ': | 
					
						
							| 
									
										
										
										
											1995-06-22 19:06:57 +00:00
										 |  |  | 				self.unixfrom = self.unixfrom + line | 
					
						
							| 
									
										
										
										
											1995-06-13 11:19:48 +00:00
										 |  |  | 			        continue | 
					
						
							|  |  |  | 			firstline = 0 | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 			if self.islast(line): | 
					
						
							|  |  |  | 				break | 
					
						
							|  |  |  | 			elif headerseen and line[0] in ' \t': | 
					
						
							|  |  |  | 				# It's a continuation line. | 
					
						
							|  |  |  | 				list.append(line) | 
					
						
							| 
									
										
										
										
											1996-05-28 23:08:25 +00:00
										 |  |  | 				x = (self.dict[headerseen] + "\n " + | 
					
						
							|  |  |  | 				     string.strip(line)) | 
					
						
							|  |  |  | 				self.dict[headerseen] = string.strip(x) | 
					
						
							|  |  |  | 			elif ':' in line: | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 				# It's a header line. | 
					
						
							|  |  |  | 				list.append(line) | 
					
						
							| 
									
										
										
										
											1996-05-28 23:08:25 +00:00
										 |  |  | 				i = string.find(line, ':') | 
					
						
							|  |  |  | 				headerseen = string.lower(line[:i]) | 
					
						
							|  |  |  | 				self.dict[headerseen] = string.strip( | 
					
						
							|  |  |  | 					line[i+1:]) | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 			else: | 
					
						
							|  |  |  | 				# It's not a header line; stop here. | 
					
						
							|  |  |  | 				if not headerseen: | 
					
						
							|  |  |  | 					self.status = 'No headers' | 
					
						
							|  |  |  | 				else: | 
					
						
							|  |  |  | 					self.status = 'Bad header' | 
					
						
							|  |  |  | 				# Try to undo the read. | 
					
						
							| 
									
										
										
										
											1995-06-22 19:06:57 +00:00
										 |  |  | 				if self.seekable: | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 					self.fp.seek(-len(line), 1) | 
					
						
							| 
									
										
										
										
											1995-06-22 19:06:57 +00:00
										 |  |  | 				else: | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 					self.status = \ | 
					
						
							|  |  |  | 						self.status + '; bad seek' | 
					
						
							|  |  |  | 				break | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	# Method to determine whether a line is a legal end of | 
					
						
							|  |  |  | 	# RFC-822 headers.  You may override this method if your | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 	# application wants to bend the rules, e.g. to strip trailing | 
					
						
							|  |  |  | 	# whitespace, or to recognise MH template separators | 
					
						
							|  |  |  | 	# ('--------').  For convenience (e.g. for code reading from | 
					
						
							|  |  |  | 	# sockets) a line consisting of \r\n also matches. | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	def islast(self, line): | 
					
						
							| 
									
										
										
										
											1995-06-22 19:06:57 +00:00
										 |  |  | 		return line in _blanklines | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	# Look through the list of headers and find all lines matching | 
					
						
							|  |  |  | 	# a given header name (and their continuation lines). | 
					
						
							|  |  |  | 	# A list of the lines is returned, without interpretation. | 
					
						
							|  |  |  | 	# If the header does not occur, an empty list is returned. | 
					
						
							|  |  |  | 	# If the header occurs multiple times, all occurrences are | 
					
						
							|  |  |  | 	# returned.  Case is not important in the header name. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def getallmatchingheaders(self, name): | 
					
						
							|  |  |  | 		name = string.lower(name) + ':' | 
					
						
							|  |  |  | 		n = len(name) | 
					
						
							|  |  |  | 		list = [] | 
					
						
							|  |  |  | 		hit = 0 | 
					
						
							|  |  |  | 		for line in self.headers: | 
					
						
							|  |  |  | 			if string.lower(line[:n]) == name: | 
					
						
							|  |  |  | 				hit = 1 | 
					
						
							|  |  |  | 			elif line[:1] not in string.whitespace: | 
					
						
							|  |  |  | 				hit = 0 | 
					
						
							|  |  |  | 			if hit: | 
					
						
							|  |  |  | 				list.append(line) | 
					
						
							|  |  |  | 		return list | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	# Similar, but return only the first matching header (and its | 
					
						
							|  |  |  | 	# continuation lines). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def getfirstmatchingheader(self, name): | 
					
						
							|  |  |  | 		name = string.lower(name) + ':' | 
					
						
							|  |  |  | 		n = len(name) | 
					
						
							|  |  |  | 		list = [] | 
					
						
							|  |  |  | 		hit = 0 | 
					
						
							|  |  |  | 		for line in self.headers: | 
					
						
							| 
									
										
										
										
											1994-08-12 13:16:50 +00:00
										 |  |  | 			if hit: | 
					
						
							|  |  |  | 				if line[:1] not in string.whitespace: | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 					break | 
					
						
							| 
									
										
										
										
											1994-08-12 13:16:50 +00:00
										 |  |  | 			elif string.lower(line[:n]) == name: | 
					
						
							|  |  |  | 				hit = 1 | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 			if hit: | 
					
						
							|  |  |  | 				list.append(line) | 
					
						
							|  |  |  | 		return list | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	# A higher-level interface to getfirstmatchingheader(). | 
					
						
							|  |  |  | 	# Return a string containing the literal text of the header | 
					
						
							|  |  |  | 	# but with the keyword stripped.  All leading, trailing and | 
					
						
							|  |  |  | 	# embedded whitespace is kept in the string, however. | 
					
						
							|  |  |  | 	# Return None if the header does not occur. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def getrawheader(self, name): | 
					
						
							|  |  |  | 		list = self.getfirstmatchingheader(name) | 
					
						
							|  |  |  | 		if not list: | 
					
						
							|  |  |  | 			return None | 
					
						
							|  |  |  | 		list[0] = list[0][len(name) + 1:] | 
					
						
							|  |  |  | 		return string.joinfields(list, '') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-05-28 23:08:25 +00:00
										 |  |  | 	# The normal interface: return a stripped version of the | 
					
						
							|  |  |  | 	# header value with a name, or None if it doesn't exist.  This | 
					
						
							|  |  |  | 	# uses the dictionary version which finds the *last* such | 
					
						
							|  |  |  | 	# header. | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	def getheader(self, name): | 
					
						
							| 
									
										
										
										
											1996-05-28 23:08:25 +00:00
										 |  |  | 		try: | 
					
						
							|  |  |  | 			return self.dict[string.lower(name)] | 
					
						
							|  |  |  | 		except KeyError: | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 			return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 	# Retrieve a single address from a header as a tuple, e.g. | 
					
						
							|  |  |  | 	# ('Guido van Rossum', 'guido@cwi.nl'). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def getaddr(self, name): | 
					
						
							| 
									
										
										
										
											1996-05-28 23:08:25 +00:00
										 |  |  | 		try: | 
					
						
							|  |  |  | 			data = self[name] | 
					
						
							|  |  |  | 		except KeyError: | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 			return None, None | 
					
						
							|  |  |  | 		return parseaddr(data) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	# Retrieve a list of addresses from a header, where each | 
					
						
							|  |  |  | 	# address is a tuple as returned by getaddr(). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def getaddrlist(self, name): | 
					
						
							|  |  |  | 		# XXX This function is not really correct.  The split | 
					
						
							|  |  |  | 		# on ',' might fail in the case of commas within | 
					
						
							|  |  |  | 		# quoted strings. | 
					
						
							| 
									
										
										
										
											1996-05-28 23:08:25 +00:00
										 |  |  | 		try: | 
					
						
							|  |  |  | 			data = self[name] | 
					
						
							|  |  |  | 		except KeyError: | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 			return [] | 
					
						
							|  |  |  | 		data = string.splitfields(data, ',') | 
					
						
							|  |  |  | 		for i in range(len(data)): | 
					
						
							|  |  |  | 			data[i] = parseaddr(data[i]) | 
					
						
							|  |  |  | 		return data | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	# Retrieve a date field from a header as a tuple compatible | 
					
						
							|  |  |  | 	# with time.mktime(). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def getdate(self, name): | 
					
						
							| 
									
										
										
										
											1996-05-28 23:08:25 +00:00
										 |  |  | 		try: | 
					
						
							|  |  |  | 			data = self[name] | 
					
						
							|  |  |  | 		except KeyError: | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 			return None | 
					
						
							|  |  |  | 		return parsedate(data) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-11-20 22:12:26 +00:00
										 |  |  | 	# Retrieve a date field from a header as a 10-tuple.   | 
					
						
							|  |  |  | 	# The first 9 elements make up a tuple compatible | 
					
						
							|  |  |  | 	# with time.mktime(), and the 10th is the offset | 
					
						
							|  |  |  | 	# of the poster's time zone from GMT/UTC. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def getdate_tz(self, name): | 
					
						
							|  |  |  | 		try: | 
					
						
							|  |  |  | 			data = self[name] | 
					
						
							|  |  |  | 		except KeyError: | 
					
						
							|  |  |  | 			return None | 
					
						
							|  |  |  | 		return parsedate_tz(data) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-05-28 23:08:25 +00:00
										 |  |  | 	# Access as a dictionary (only finds *last* header of each type): | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 	def __len__(self): | 
					
						
							| 
									
										
										
										
											1996-05-28 23:08:25 +00:00
										 |  |  | 		return len(self.dict) | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	def __getitem__(self, name): | 
					
						
							| 
									
										
										
										
											1996-05-28 23:08:25 +00:00
										 |  |  | 		return self.dict[string.lower(name)] | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	def has_key(self, name): | 
					
						
							| 
									
										
										
										
											1996-05-28 23:08:25 +00:00
										 |  |  | 		return self.dict.has_key(string.lower(name)) | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	def keys(self): | 
					
						
							| 
									
										
										
										
											1996-05-28 23:08:25 +00:00
										 |  |  | 		return self.dict.keys() | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 	def values(self): | 
					
						
							| 
									
										
										
										
											1996-05-28 23:08:25 +00:00
										 |  |  | 		return self.dict.values() | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	def items(self): | 
					
						
							| 
									
										
										
										
											1996-05-28 23:08:25 +00:00
										 |  |  | 		return self.dict.items() | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Utility functions | 
					
						
							|  |  |  | # ----------------- | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | # XXX Should fix these to be really conformant. | 
					
						
							|  |  |  | # XXX The inverses of the parse functions may also be useful. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1992-07-13 14:28:59 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # Remove quotes from a string. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def unquote(str): | 
					
						
							|  |  |  | 	if len(str) > 1: | 
					
						
							|  |  |  | 		if str[0] == '"' and str[-1:] == '"': | 
					
						
							|  |  |  | 			return str[1:-1] | 
					
						
							|  |  |  | 		if str[0] == '<' and str[-1:] == '>': | 
					
						
							|  |  |  | 			return str[1:-1] | 
					
						
							|  |  |  | 	return str | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Parse an address into (name, address) tuple | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def parseaddr(address): | 
					
						
							| 
									
										
										
										
											1996-07-30 16:29:16 +00:00
										 |  |  | 	import string | 
					
						
							|  |  |  | 	str = '' | 
					
						
							|  |  |  | 	email = '' | 
					
						
							|  |  |  | 	comment = '' | 
					
						
							|  |  |  | 	backslash = 0 | 
					
						
							|  |  |  | 	dquote = 0 | 
					
						
							|  |  |  | 	space = 0 | 
					
						
							|  |  |  | 	paren = 0 | 
					
						
							|  |  |  | 	bracket = 0 | 
					
						
							|  |  |  | 	seen_bracket = 0 | 
					
						
							|  |  |  | 	for c in address: | 
					
						
							|  |  |  | 		if backslash: | 
					
						
							|  |  |  | 			str = str + c | 
					
						
							|  |  |  | 			backslash = 0 | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		if c == '\\': | 
					
						
							|  |  |  | 			backslash = 1 | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		if dquote: | 
					
						
							|  |  |  | 			if c == '"': | 
					
						
							|  |  |  | 				dquote = 0 | 
					
						
							|  |  |  | 			else: | 
					
						
							|  |  |  | 				str = str + c | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		if c == '"': | 
					
						
							|  |  |  | 			dquote = 1 | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		if c in string.whitespace: | 
					
						
							|  |  |  | 			space = 1 | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		if space: | 
					
						
							|  |  |  | 			str = str + ' ' | 
					
						
							|  |  |  | 			space = 0 | 
					
						
							|  |  |  | 		if paren: | 
					
						
							|  |  |  | 			if c == '(': | 
					
						
							|  |  |  | 				paren = paren + 1 | 
					
						
							|  |  |  | 				str = str + c | 
					
						
							|  |  |  | 				continue | 
					
						
							|  |  |  | 			if c == ')': | 
					
						
							|  |  |  | 				paren = paren - 1 | 
					
						
							|  |  |  | 				if paren == 0: | 
					
						
							|  |  |  | 					comment = comment + str | 
					
						
							|  |  |  | 					str = '' | 
					
						
							|  |  |  | 					continue | 
					
						
							|  |  |  | 		if c == '(': | 
					
						
							|  |  |  | 			paren = paren + 1 | 
					
						
							|  |  |  | 			if bracket: | 
					
						
							|  |  |  | 				email = email + str | 
					
						
							|  |  |  | 				str = '' | 
					
						
							|  |  |  | 			elif not seen_bracket: | 
					
						
							|  |  |  | 				email = email + str | 
					
						
							|  |  |  | 				str = '' | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		if bracket: | 
					
						
							|  |  |  | 			if c == '>': | 
					
						
							|  |  |  | 				bracket = 0 | 
					
						
							|  |  |  | 				email = email + str | 
					
						
							|  |  |  | 				str = '' | 
					
						
							|  |  |  | 				continue | 
					
						
							|  |  |  | 		if c == '<': | 
					
						
							|  |  |  | 			bracket = 1 | 
					
						
							|  |  |  | 			seen_bracket = 1 | 
					
						
							|  |  |  | 			comment = comment + str | 
					
						
							|  |  |  | 			str = '' | 
					
						
							|  |  |  | 			email = '' | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		if c == '#' and not bracket and not paren: | 
					
						
							|  |  |  | 			# rest is comment | 
					
						
							|  |  |  | 			break | 
					
						
							|  |  |  | 		str = str + c | 
					
						
							|  |  |  | 	if str: | 
					
						
							|  |  |  | 		if seen_bracket: | 
					
						
							|  |  |  | 			if bracket: | 
					
						
							|  |  |  | 				email = str | 
					
						
							|  |  |  | 			else: | 
					
						
							|  |  |  | 				comment = comment + str | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 		else: | 
					
						
							| 
									
										
										
										
											1996-07-30 16:29:16 +00:00
										 |  |  | 			if paren: | 
					
						
							|  |  |  | 				comment = comment + str | 
					
						
							|  |  |  | 			else: | 
					
						
							|  |  |  | 				email = email + str | 
					
						
							|  |  |  | 	return string.strip(comment), string.strip(email) | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Parse a date field | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _monthnames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', | 
					
						
							|  |  |  | 	  'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-11-20 22:12:26 +00:00
										 |  |  | # The timezone table does not include the military time zones defined | 
					
						
							|  |  |  | # in RFC822, other than Z.  According to RFC1123, the description in | 
					
						
							|  |  |  | # RFC822 gets the signs wrong, so we can't rely on any such time | 
					
						
							|  |  |  | # zones.  RFC1123 recommends that numeric timezone indicators be used | 
					
						
							|  |  |  | # instead of timezone names. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,  | 
					
						
							|  |  |  | 	      'AST': -400, 'ADT': -300,  # Atlantic standard | 
					
						
							|  |  |  | 	      'EST': -500, 'EDT': -400,  # Eastern | 
					
						
							|  |  |  | 	      'CST': -600, 'CDT':-500,   # Centreal | 
					
						
							|  |  |  | 	      'MST':-700, 'MDT':-600,    # Mountain | 
					
						
							|  |  |  | 	      'PST':-800, 'PDT':-700     # Pacific | 
					
						
							|  |  |  | 	     }     | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def parsedate_tz(data): | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 	data = string.split(data) | 
					
						
							|  |  |  | 	if data[0][-1] == ',': | 
					
						
							|  |  |  | 		# There's a dayname here. Skip it | 
					
						
							|  |  |  | 		del data[0] | 
					
						
							| 
									
										
										
										
											1994-09-09 11:10:15 +00:00
										 |  |  | 	if len(data) == 4: | 
					
						
							|  |  |  | 		s = data[3] | 
					
						
							|  |  |  | 		i = string.find(s, '+') | 
					
						
							|  |  |  | 		if i > 0: | 
					
						
							|  |  |  | 			data[3:] = [s[:i], s[i+1:]] | 
					
						
							|  |  |  | 		else: | 
					
						
							|  |  |  | 			data.append('') # Dummy tz | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 	if len(data) < 5: | 
					
						
							|  |  |  | 		return None | 
					
						
							|  |  |  | 	data = data[:5] | 
					
						
							|  |  |  | 	[dd, mm, yy, tm, tz] = data | 
					
						
							|  |  |  | 	if not mm in _monthnames: | 
					
						
							|  |  |  | 		return None | 
					
						
							|  |  |  | 	mm = _monthnames.index(mm)+1 | 
					
						
							|  |  |  | 	tm = string.splitfields(tm, ':') | 
					
						
							|  |  |  | 	if len(tm) == 2: | 
					
						
							|  |  |  | 		[thh, tmm] = tm | 
					
						
							|  |  |  | 		tss = '0' | 
					
						
							|  |  |  | 	else: | 
					
						
							|  |  |  | 		[thh, tmm, tss] = tm | 
					
						
							|  |  |  | 	try: | 
					
						
							|  |  |  | 		yy = string.atoi(yy) | 
					
						
							|  |  |  | 		dd = string.atoi(dd) | 
					
						
							|  |  |  | 		thh = string.atoi(thh) | 
					
						
							|  |  |  | 		tmm = string.atoi(tmm) | 
					
						
							|  |  |  | 		tss = string.atoi(tss) | 
					
						
							|  |  |  | 	except string.atoi_error: | 
					
						
							|  |  |  | 		return None | 
					
						
							| 
									
										
										
										
											1996-11-20 22:12:26 +00:00
										 |  |  | 	tzoffset=0 | 
					
						
							|  |  |  | 	tz=string.upper(tz) | 
					
						
							|  |  |  | 	if _timezones.has_key(tz): | 
					
						
							|  |  |  | 		tzoffset=_timezones[tz] | 
					
						
							|  |  |  | 	else: | 
					
						
							|  |  |  | 		try:  | 
					
						
							|  |  |  | 			tzoffset=string.atoi(tz) | 
					
						
							|  |  |  | 		except string.atoi_error:  | 
					
						
							|  |  |  | 			pass | 
					
						
							|  |  |  | 	# Convert a timezone offset into seconds ; -0500 -> -18000 | 
					
						
							|  |  |  | 	if tzoffset<0: tzsign=-1 | 
					
						
							|  |  |  | 	else: tzsign=1 | 
					
						
							|  |  |  | 	tzoffset=tzoffset*tzsign | 
					
						
							|  |  |  | 	tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60) | 
					
						
							|  |  |  | 	tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset) | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 	return tuple | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-11-20 22:12:26 +00:00
										 |  |  | def parsedate(data): | 
					
						
							|  |  |  | 	t=parsedate_tz(data) | 
					
						
							|  |  |  | 	if type(t)==type( () ): | 
					
						
							|  |  |  | 		return t[:9] | 
					
						
							|  |  |  | 	else: return t     | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # When used as script, run a small test program. | 
					
						
							|  |  |  | # The first command line argument must be a filename containing one | 
					
						
							|  |  |  | # message in RFC-822 format. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if __name__ == '__main__': | 
					
						
							| 
									
										
										
										
											1996-07-30 16:29:16 +00:00
										 |  |  | 	import sys, os | 
					
						
							| 
									
										
										
										
											1996-11-20 22:12:26 +00:00
										 |  |  | 	file = os.path.join(os.environ['HOME'], 'Mail/inbox/1') | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 	if sys.argv[1:]: file = sys.argv[1] | 
					
						
							|  |  |  | 	f = open(file, 'r') | 
					
						
							|  |  |  | 	m = Message(f) | 
					
						
							|  |  |  | 	print 'From:', m.getaddr('from') | 
					
						
							|  |  |  | 	print 'To:', m.getaddrlist('to') | 
					
						
							|  |  |  | 	print 'Subject:', m.getheader('subject') | 
					
						
							|  |  |  | 	print 'Date:', m.getheader('date') | 
					
						
							| 
									
										
										
										
											1996-11-20 22:12:26 +00:00
										 |  |  | 	date = m.getdate_tz('date') | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 	if date: | 
					
						
							| 
									
										
										
										
											1996-11-20 22:12:26 +00:00
										 |  |  | 		print 'ParsedDate:', time.asctime(date[:-1]), | 
					
						
							|  |  |  | 		hhmmss = date[-1] | 
					
						
							|  |  |  | 		hhmm, ss = divmod(hhmmss, 60) | 
					
						
							|  |  |  | 		hh, mm = divmod(hhmm, 60) | 
					
						
							|  |  |  | 		print "%+03d%02d" % (hh, mm), | 
					
						
							|  |  |  | 		if ss: print ".%02d" % ss, | 
					
						
							|  |  |  | 		print | 
					
						
							| 
									
										
										
										
											1994-08-01 11:34:53 +00:00
										 |  |  | 	else: | 
					
						
							|  |  |  | 		print 'ParsedDate:', None | 
					
						
							|  |  |  | 	m.rewindbody() | 
					
						
							|  |  |  | 	n = 0 | 
					
						
							|  |  |  | 	while f.readline(): | 
					
						
							|  |  |  | 		n = n + 1 | 
					
						
							|  |  |  | 	print 'Lines:', n | 
					
						
							|  |  |  | 	print '-'*70 | 
					
						
							|  |  |  | 	print 'len =', len(m) | 
					
						
							|  |  |  | 	if m.has_key('Date'): print 'Date =', m['Date'] | 
					
						
							|  |  |  | 	if m.has_key('X-Nonsense'): pass | 
					
						
							|  |  |  | 	print 'keys =', m.keys() | 
					
						
							|  |  |  | 	print 'values =', m.values() | 
					
						
							|  |  |  | 	print 'items =', m.items() | 
					
						
							|  |  |  | 	 |