mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	
		
			
	
	
		
			442 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			442 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | # Copyright (C) 2002 Python Software Foundation | ||
|  | 
 | ||
|  | """Email address parsing code.
 | ||
|  | 
 | ||
|  | Lifted directly from rfc822.py.  This should eventually be rewritten. | ||
|  | """
 | ||
|  | 
 | ||
|  | import time | ||
|  | 
 | ||
|  | # Parse a date field | ||
|  | _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', | ||
|  |                'aug', 'sep', 'oct', 'nov', 'dec', | ||
|  |                'january', 'february', 'march', 'april', 'may', 'june', 'july', | ||
|  |                'august', 'september', 'october', 'november', 'december'] | ||
|  | 
 | ||
|  | _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] | ||
|  | 
 | ||
|  | # The timezone table does not include the military time zones defined | ||
|  | # in RFC822, other than Z.  According to RFC1123, the description in | ||
|  | # RFC822 gets the signs wrong, so we can't rely on any such time | ||
|  | # zones.  RFC1123 recommends that numeric timezone indicators be used | ||
|  | # instead of timezone names. | ||
|  | 
 | ||
|  | _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, | ||
|  |               'AST': -400, 'ADT': -300,  # Atlantic (used in Canada) | ||
|  |               'EST': -500, 'EDT': -400,  # Eastern | ||
|  |               'CST': -600, 'CDT': -500,  # Central | ||
|  |               'MST': -700, 'MDT': -600,  # Mountain | ||
|  |               'PST': -800, 'PDT': -700   # Pacific | ||
|  |               } | ||
|  | 
 | ||
|  | 
 | ||
|  | def parsedate_tz(data): | ||
|  |     """Convert a date string to a time tuple.
 | ||
|  | 
 | ||
|  |     Accounts for military timezones. | ||
|  |     """
 | ||
|  |     data = data.split() | ||
|  |     if data[0][-1] in (',', '.') or data[0].lower() in _daynames: | ||
|  |         # There's a dayname here. Skip it | ||
|  |         del data[0] | ||
|  |     if len(data) == 3: # RFC 850 date, deprecated | ||
|  |         stuff = data[0].split('-') | ||
|  |         if len(stuff) == 3: | ||
|  |             data = stuff + data[1:] | ||
|  |     if len(data) == 4: | ||
|  |         s = data[3] | ||
|  |         i = s.find('+') | ||
|  |         if i > 0: | ||
|  |             data[3:] = [s[:i], s[i+1:]] | ||
|  |         else: | ||
|  |             data.append('') # Dummy tz | ||
|  |     if len(data) < 5: | ||
|  |         return None | ||
|  |     data = data[:5] | ||
|  |     [dd, mm, yy, tm, tz] = data | ||
|  |     mm = mm.lower() | ||
|  |     if not mm in _monthnames: | ||
|  |         dd, mm = mm, dd.lower() | ||
|  |         if not mm in _monthnames: | ||
|  |             return None | ||
|  |     mm = _monthnames.index(mm)+1 | ||
|  |     if mm > 12: mm = mm - 12 | ||
|  |     if dd[-1] == ',': | ||
|  |         dd = dd[:-1] | ||
|  |     i = yy.find(':') | ||
|  |     if i > 0: | ||
|  |         yy, tm = tm, yy | ||
|  |     if yy[-1] == ',': | ||
|  |         yy = yy[:-1] | ||
|  |     if not yy[0].isdigit(): | ||
|  |         yy, tz = tz, yy | ||
|  |     if tm[-1] == ',': | ||
|  |         tm = tm[:-1] | ||
|  |     tm = tm.split(':') | ||
|  |     if len(tm) == 2: | ||
|  |         [thh, tmm] = tm | ||
|  |         tss = '0' | ||
|  |     elif len(tm) == 3: | ||
|  |         [thh, tmm, tss] = tm | ||
|  |     else: | ||
|  |         return None | ||
|  |     try: | ||
|  |         yy = int(yy) | ||
|  |         dd = int(dd) | ||
|  |         thh = int(thh) | ||
|  |         tmm = int(tmm) | ||
|  |         tss = int(tss) | ||
|  |     except ValueError: | ||
|  |         return None | ||
|  |     tzoffset = None | ||
|  |     tz = tz.upper() | ||
|  |     if _timezones.has_key(tz): | ||
|  |         tzoffset = _timezones[tz] | ||
|  |     else: | ||
|  |         try: | ||
|  |             tzoffset = int(tz) | ||
|  |         except ValueError: | ||
|  |             pass | ||
|  |     # Convert a timezone offset into seconds ; -0500 -> -18000 | ||
|  |     if tzoffset: | ||
|  |         if tzoffset < 0: | ||
|  |             tzsign = -1 | ||
|  |             tzoffset = -tzoffset | ||
|  |         else: | ||
|  |             tzsign = 1 | ||
|  |         tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60) | ||
|  |     tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset) | ||
|  |     return tuple | ||
|  | 
 | ||
|  | 
 | ||
|  | def parsedate(data): | ||
|  |     """Convert a time string to a time tuple.""" | ||
|  |     t = parsedate_tz(data) | ||
|  |     if type(t) == type( () ): | ||
|  |         return t[:9] | ||
|  |     else: return t | ||
|  | 
 | ||
|  | 
 | ||
|  | def mktime_tz(data): | ||
|  |     """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp.""" | ||
|  |     if data[9] is None: | ||
|  |         # No zone info, so localtime is better assumption than GMT | ||
|  |         return time.mktime(data[:8] + (-1,)) | ||
|  |     else: | ||
|  |         t = time.mktime(data[:8] + (0,)) | ||
|  |         return t - data[9] - time.timezone | ||
|  | 
 | ||
|  | 
 | ||
|  | def quote(str): | ||
|  |     """Add quotes around a string.""" | ||
|  |     return str.replace('\\', '\\\\').replace('"', '\\"') | ||
|  | 
 | ||
|  | 
 | ||
|  | class AddrlistClass: | ||
|  |     """Address parser class by Ben Escoto.
 | ||
|  | 
 | ||
|  |     To understand what this class does, it helps to have a copy of | ||
|  |     RFC-822 in front of you. | ||
|  | 
 | ||
|  |     Note: this class interface is deprecated and may be removed in the future. | ||
|  |     Use rfc822.AddressList instead. | ||
|  |     """
 | ||
|  | 
 | ||
|  |     def __init__(self, field): | ||
|  |         """Initialize a new instance.
 | ||
|  | 
 | ||
|  |         `field' is an unparsed address header field, containing | ||
|  |         one or more addresses. | ||
|  |         """
 | ||
|  |         self.specials = '()<>@,:;.\"[]' | ||
|  |         self.pos = 0 | ||
|  |         self.LWS = ' \t' | ||
|  |         self.CR = '\r\n' | ||
|  |         self.atomends = self.specials + self.LWS + self.CR | ||
|  |         self.field = field | ||
|  |         self.commentlist = [] | ||
|  | 
 | ||
|  |     def gotonext(self): | ||
|  |         """Parse up to the start of the next address.""" | ||
|  |         while self.pos < len(self.field): | ||
|  |             if self.field[self.pos] in self.LWS + '\n\r': | ||
|  |                 self.pos = self.pos + 1 | ||
|  |             elif self.field[self.pos] == '(': | ||
|  |                 self.commentlist.append(self.getcomment()) | ||
|  |             else: break | ||
|  | 
 | ||
|  |     def getaddrlist(self): | ||
|  |         """Parse all addresses.
 | ||
|  | 
 | ||
|  |         Returns a list containing all of the addresses. | ||
|  |         """
 | ||
|  |         ad = self.getaddress() | ||
|  |         if ad: | ||
|  |             return ad + self.getaddrlist() | ||
|  |         else: return [] | ||
|  | 
 | ||
|  |     def getaddress(self): | ||
|  |         """Parse the next address.""" | ||
|  |         self.commentlist = [] | ||
|  |         self.gotonext() | ||
|  | 
 | ||
|  |         oldpos = self.pos | ||
|  |         oldcl = self.commentlist | ||
|  |         plist = self.getphraselist() | ||
|  | 
 | ||
|  |         self.gotonext() | ||
|  |         returnlist = [] | ||
|  | 
 | ||
|  |         if self.pos >= len(self.field): | ||
|  |             # Bad email address technically, no domain. | ||
|  |             if plist: | ||
|  |                 returnlist = [(' '.join(self.commentlist), plist[0])] | ||
|  | 
 | ||
|  |         elif self.field[self.pos] in '.@': | ||
|  |             # email address is just an addrspec | ||
|  |             # this isn't very efficient since we start over | ||
|  |             self.pos = oldpos | ||
|  |             self.commentlist = oldcl | ||
|  |             addrspec = self.getaddrspec() | ||
|  |             returnlist = [(' '.join(self.commentlist), addrspec)] | ||
|  | 
 | ||
|  |         elif self.field[self.pos] == ':': | ||
|  |             # address is a group | ||
|  |             returnlist = [] | ||
|  | 
 | ||
|  |             fieldlen = len(self.field) | ||
|  |             self.pos = self.pos + 1 | ||
|  |             while self.pos < len(self.field): | ||
|  |                 self.gotonext() | ||
|  |                 if self.pos < fieldlen and self.field[self.pos] == ';': | ||
|  |                     self.pos = self.pos + 1 | ||
|  |                     break | ||
|  |                 returnlist = returnlist + self.getaddress() | ||
|  | 
 | ||
|  |         elif self.field[self.pos] == '<': | ||
|  |             # Address is a phrase then a route addr | ||
|  |             routeaddr = self.getrouteaddr() | ||
|  | 
 | ||
|  |             if self.commentlist: | ||
|  |                 returnlist = [(' '.join(plist) + ' (' + \ | ||
|  |                          ' '.join(self.commentlist) + ')', routeaddr)] | ||
|  |             else: returnlist = [(' '.join(plist), routeaddr)] | ||
|  | 
 | ||
|  |         else: | ||
|  |             if plist: | ||
|  |                 returnlist = [(' '.join(self.commentlist), plist[0])] | ||
|  |             elif self.field[self.pos] in self.specials: | ||
|  |                 self.pos = self.pos + 1 | ||
|  | 
 | ||
|  |         self.gotonext() | ||
|  |         if self.pos < len(self.field) and self.field[self.pos] == ',': | ||
|  |             self.pos = self.pos + 1 | ||
|  |         return returnlist | ||
|  | 
 | ||
|  |     def getrouteaddr(self): | ||
|  |         """Parse a route address (Return-path value).
 | ||
|  | 
 | ||
|  |         This method just skips all the route stuff and returns the addrspec. | ||
|  |         """
 | ||
|  |         if self.field[self.pos] != '<': | ||
|  |             return | ||
|  | 
 | ||
|  |         expectroute = 0 | ||
|  |         self.pos = self.pos + 1 | ||
|  |         self.gotonext() | ||
|  |         adlist = "" | ||
|  |         while self.pos < len(self.field): | ||
|  |             if expectroute: | ||
|  |                 self.getdomain() | ||
|  |                 expectroute = 0 | ||
|  |             elif self.field[self.pos] == '>': | ||
|  |                 self.pos = self.pos + 1 | ||
|  |                 break | ||
|  |             elif self.field[self.pos] == '@': | ||
|  |                 self.pos = self.pos + 1 | ||
|  |                 expectroute = 1 | ||
|  |             elif self.field[self.pos] == ':': | ||
|  |                 self.pos = self.pos + 1 | ||
|  |                 expectaddrspec = 1 | ||
|  |             else: | ||
|  |                 adlist = self.getaddrspec() | ||
|  |                 self.pos = self.pos + 1 | ||
|  |                 break | ||
|  |             self.gotonext() | ||
|  | 
 | ||
|  |         return adlist | ||
|  | 
 | ||
|  |     def getaddrspec(self): | ||
|  |         """Parse an RFC-822 addr-spec.""" | ||
|  |         aslist = [] | ||
|  | 
 | ||
|  |         self.gotonext() | ||
|  |         while self.pos < len(self.field): | ||
|  |             if self.field[self.pos] == '.': | ||
|  |                 aslist.append('.') | ||
|  |                 self.pos = self.pos + 1 | ||
|  |             elif self.field[self.pos] == '"': | ||
|  |                 aslist.append('"%s"' % self.getquote()) | ||
|  |             elif self.field[self.pos] in self.atomends: | ||
|  |                 break | ||
|  |             else: aslist.append(self.getatom()) | ||
|  |             self.gotonext() | ||
|  | 
 | ||
|  |         if self.pos >= len(self.field) or self.field[self.pos] != '@': | ||
|  |             return ''.join(aslist) | ||
|  | 
 | ||
|  |         aslist.append('@') | ||
|  |         self.pos = self.pos + 1 | ||
|  |         self.gotonext() | ||
|  |         return ''.join(aslist) + self.getdomain() | ||
|  | 
 | ||
|  |     def getdomain(self): | ||
|  |         """Get the complete domain name from an address.""" | ||
|  |         sdlist = [] | ||
|  |         while self.pos < len(self.field): | ||
|  |             if self.field[self.pos] in self.LWS: | ||
|  |                 self.pos = self.pos + 1 | ||
|  |             elif self.field[self.pos] == '(': | ||
|  |                 self.commentlist.append(self.getcomment()) | ||
|  |             elif self.field[self.pos] == '[': | ||
|  |                 sdlist.append(self.getdomainliteral()) | ||
|  |             elif self.field[self.pos] == '.': | ||
|  |                 self.pos = self.pos + 1 | ||
|  |                 sdlist.append('.') | ||
|  |             elif self.field[self.pos] in self.atomends: | ||
|  |                 break | ||
|  |             else: sdlist.append(self.getatom()) | ||
|  |         return ''.join(sdlist) | ||
|  | 
 | ||
|  |     def getdelimited(self, beginchar, endchars, allowcomments = 1): | ||
|  |         """Parse a header fragment delimited by special characters.
 | ||
|  | 
 | ||
|  |         `beginchar' is the start character for the fragment. | ||
|  |         If self is not looking at an instance of `beginchar' then | ||
|  |         getdelimited returns the empty string. | ||
|  | 
 | ||
|  |         `endchars' is a sequence of allowable end-delimiting characters. | ||
|  |         Parsing stops when one of these is encountered. | ||
|  | 
 | ||
|  |         If `allowcomments' is non-zero, embedded RFC-822 comments | ||
|  |         are allowed within the parsed fragment. | ||
|  |         """
 | ||
|  |         if self.field[self.pos] != beginchar: | ||
|  |             return '' | ||
|  | 
 | ||
|  |         slist = [''] | ||
|  |         quote = 0 | ||
|  |         self.pos = self.pos + 1 | ||
|  |         while self.pos < len(self.field): | ||
|  |             if quote == 1: | ||
|  |                 slist.append(self.field[self.pos]) | ||
|  |                 quote = 0 | ||
|  |             elif self.field[self.pos] in endchars: | ||
|  |                 self.pos = self.pos + 1 | ||
|  |                 break | ||
|  |             elif allowcomments and self.field[self.pos] == '(': | ||
|  |                 slist.append(self.getcomment()) | ||
|  |             elif self.field[self.pos] == '\\': | ||
|  |                 quote = 1 | ||
|  |             else: | ||
|  |                 slist.append(self.field[self.pos]) | ||
|  |             self.pos = self.pos + 1 | ||
|  | 
 | ||
|  |         return ''.join(slist) | ||
|  | 
 | ||
|  |     def getquote(self): | ||
|  |         """Get a quote-delimited fragment from self's field.""" | ||
|  |         return self.getdelimited('"', '"\r', 0) | ||
|  | 
 | ||
|  |     def getcomment(self): | ||
|  |         """Get a parenthesis-delimited fragment from self's field.""" | ||
|  |         return self.getdelimited('(', ')\r', 1) | ||
|  | 
 | ||
|  |     def getdomainliteral(self): | ||
|  |         """Parse an RFC-822 domain-literal.""" | ||
|  |         return '[%s]' % self.getdelimited('[', ']\r', 0) | ||
|  | 
 | ||
|  |     def getatom(self): | ||
|  |         """Parse an RFC-822 atom.""" | ||
|  |         atomlist = [''] | ||
|  | 
 | ||
|  |         while self.pos < len(self.field): | ||
|  |             if self.field[self.pos] in self.atomends: | ||
|  |                 break | ||
|  |             else: atomlist.append(self.field[self.pos]) | ||
|  |             self.pos = self.pos + 1 | ||
|  | 
 | ||
|  |         return ''.join(atomlist) | ||
|  | 
 | ||
|  |     def getphraselist(self): | ||
|  |         """Parse a sequence of RFC-822 phrases.
 | ||
|  | 
 | ||
|  |         A phrase is a sequence of words, which are in turn either | ||
|  |         RFC-822 atoms or quoted-strings.  Phrases are canonicalized | ||
|  |         by squeezing all runs of continuous whitespace into one space. | ||
|  |         """
 | ||
|  |         plist = [] | ||
|  | 
 | ||
|  |         while self.pos < len(self.field): | ||
|  |             if self.field[self.pos] in self.LWS: | ||
|  |                 self.pos = self.pos + 1 | ||
|  |             elif self.field[self.pos] == '"': | ||
|  |                 plist.append(self.getquote()) | ||
|  |             elif self.field[self.pos] == '(': | ||
|  |                 self.commentlist.append(self.getcomment()) | ||
|  |             elif self.field[self.pos] in self.atomends: | ||
|  |                 break | ||
|  |             else: plist.append(self.getatom()) | ||
|  | 
 | ||
|  |         return plist | ||
|  | 
 | ||
|  | class AddressList(AddrlistClass): | ||
|  |     """An AddressList encapsulates a list of parsed RFC822 addresses.""" | ||
|  |     def __init__(self, field): | ||
|  |         AddrlistClass.__init__(self, field) | ||
|  |         if field: | ||
|  |             self.addresslist = self.getaddrlist() | ||
|  |         else: | ||
|  |             self.addresslist = [] | ||
|  | 
 | ||
|  |     def __len__(self): | ||
|  |         return len(self.addresslist) | ||
|  | 
 | ||
|  |     def __str__(self): | ||
|  |         return ", ".join(map(dump_address_pair, self.addresslist)) | ||
|  | 
 | ||
|  |     def __add__(self, other): | ||
|  |         # Set union | ||
|  |         newaddr = AddressList(None) | ||
|  |         newaddr.addresslist = self.addresslist[:] | ||
|  |         for x in other.addresslist: | ||
|  |             if not x in self.addresslist: | ||
|  |                 newaddr.addresslist.append(x) | ||
|  |         return newaddr | ||
|  | 
 | ||
|  |     def __iadd__(self, other): | ||
|  |         # Set union, in-place | ||
|  |         for x in other.addresslist: | ||
|  |             if not x in self.addresslist: | ||
|  |                 self.addresslist.append(x) | ||
|  |         return self | ||
|  | 
 | ||
|  |     def __sub__(self, other): | ||
|  |         # Set difference | ||
|  |         newaddr = AddressList(None) | ||
|  |         for x in self.addresslist: | ||
|  |             if not x in other.addresslist: | ||
|  |                 newaddr.addresslist.append(x) | ||
|  |         return newaddr | ||
|  | 
 | ||
|  |     def __isub__(self, other): | ||
|  |         # Set difference, in-place | ||
|  |         for x in other.addresslist: | ||
|  |             if x in self.addresslist: | ||
|  |                 self.addresslist.remove(x) | ||
|  |         return self | ||
|  | 
 | ||
|  |     def __getitem__(self, index): | ||
|  |         # Make indexing, slices, and 'in' work | ||
|  |         return self.addresslist[index] |