AppPkg/Applications/Python/Python-2.7.2/Lib/email/_parseaddr.py

   1 # Copyright (C) 2002-2007 Python Software Foundation
   2 # Contact: email-sig@python.org
   3
   4 """Email address parsing code.
   5
   6 Lifted directly from rfc822.py.  This should eventually be rewritten.
   7 """
   8
   9 __all__ = [
  10     'mktime_tz',
  11     'parsedate',
  12     'parsedate_tz',
  13     'quote',
  14     ]
  15
  16 import time
  17
  18 SPACE = ' '
  19 EMPTYSTRING = ''
  20 COMMASPACE = ', '
  21
  22 # Parse a date field
  23 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
  24                'aug', 'sep', 'oct', 'nov', 'dec',
  25                'january', 'february', 'march', 'april', 'may', 'june', 'july',
  26                'august', 'september', 'october', 'november', 'december']
  27
  28 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
  29
  30 # The timezone table does not include the military time zones defined
  31 # in RFC822, other than Z.  According to RFC1123, the description in
  32 # RFC822 gets the signs wrong, so we can't rely on any such time
  33 # zones.  RFC1123 recommends that numeric timezone indicators be used
  34 # instead of timezone names.
  35
  36 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
  37               'AST': -400, 'ADT': -300,  # Atlantic (used in Canada)
  38               'EST': -500, 'EDT': -400,  # Eastern
  39               'CST': -600, 'CDT': -500,  # Central
  40               'MST': -700, 'MDT': -600,  # Mountain
  41               'PST': -800, 'PDT': -700   # Pacific
  42               }
  43
  44
  45 def parsedate_tz(data):
  46     """Convert a date string to a time tuple.
  47
  48     Accounts for military timezones.
  49     """
  50     data = data.split()
  51     # The FWS after the comma after the day-of-week is optional, so search and
  52     # adjust for this.
  53     if data[0].endswith(',') or data[0].lower() in _daynames:
  54         # There's a dayname here. Skip it
  55         del data[0]
  56     else:
  57         i = data[0].rfind(',')
  58         if i >= 0:
  59             data[0] = data[0][i+1:]
  60     if len(data) == 3: # RFC 850 date, deprecated
  61         stuff = data[0].split('-')
  62         if len(stuff) == 3:
  63             data = stuff + data[1:]
  64     if len(data) == 4:
  65         s = data[3]
  66         i = s.find('+')
  67         if i > 0:
  68             data[3:] = [s[:i], s[i+1:]]
  69         else:
  70             data.append('') # Dummy tz
  71     if len(data) < 5:
  72         return None
  73     data = data[:5]
  74     [dd, mm, yy, tm, tz] = data
  75     mm = mm.lower()
  76     if mm not in _monthnames:
  77         dd, mm = mm, dd.lower()
  78         if mm not in _monthnames:
  79             return None
  80     mm = _monthnames.index(mm) + 1
  81     if mm > 12:
  82         mm -= 12
  83     if dd[-1] == ',':
  84         dd = dd[:-1]
  85     i = yy.find(':')
  86     if i > 0:
  87         yy, tm = tm, yy
  88     if yy[-1] == ',':
  89         yy = yy[:-1]
  90     if not yy[0].isdigit():
  91         yy, tz = tz, yy
  92     if tm[-1] == ',':
  93         tm = tm[:-1]
  94     tm = tm.split(':')
  95     if len(tm) == 2:
  96         [thh, tmm] = tm
  97         tss = '0'
  98     elif len(tm) == 3:
  99         [thh, tmm, tss] = tm
 100     else:
 101         return None
 102     try:
 103         yy = int(yy)
 104         dd = int(dd)
 105         thh = int(thh)
 106         tmm = int(tmm)
 107         tss = int(tss)
 108     except ValueError:
 109         return None
 110     # Check for a yy specified in two-digit format, then convert it to the
 111     # appropriate four-digit format, according to the POSIX standard. RFC 822
 112     # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
 113     # mandates a 4-digit yy. For more information, see the documentation for
 114     # the time module.
 115     if yy < 100:
 116         # The year is between 1969 and 1999 (inclusive).
 117         if yy > 68:
 118             yy += 1900
 119         # The year is between 2000 and 2068 (inclusive).
 120         else:
 121             yy += 2000
 122     tzoffset = None
 123     tz = tz.upper()
 124     if tz in _timezones:
 125         tzoffset = _timezones[tz]
 126     else:
 127         try:
 128             tzoffset = int(tz)
 129         except ValueError:
 130             pass
 131     # Convert a timezone offset into seconds ; -0500 -> -18000
 132     if tzoffset:
 133         if tzoffset < 0:
 134             tzsign = -1
 135             tzoffset = -tzoffset
 136         else:
 137             tzsign = 1
 138         tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
 139     # Daylight Saving Time flag is set to -1, since DST is unknown.
 140     return yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset
 141
 142
 143 def parsedate(data):
 144     """Convert a time string to a time tuple."""
 145     t = parsedate_tz(data)
 146     if isinstance(t, tuple):
 147         return t[:9]
 148     else:
 149         return t
 150
 151
 152 def mktime_tz(data):
 153     """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
 154     if data[9] is None:
 155         # No zone info, so localtime is better assumption than GMT
 156         return time.mktime(data[:8] + (-1,))
 157     else:
 158         t = time.mktime(data[:8] + (0,))
 159         return t - data[9] - time.timezone
 160
 161
 162 def quote(str):
 163     """Prepare string to be used in a quoted string.
 164
 165     Turns backslash and double quote characters into quoted pairs.  These
 166     are the only characters that need to be quoted inside a quoted string.
 167     Does not add the surrounding double quotes.
 168     """
 169     return str.replace('\\', '\\\\').replace('"', '\\"')
 170
 171
 172 class AddrlistClass:
 173     """Address parser class by Ben Escoto.
 174
 175     To understand what this class does, it helps to have a copy of RFC 2822 in
 176     front of you.
 177
 178     Note: this class interface is deprecated and may be removed in the future.
 179     Use rfc822.AddressList instead.
 180     """
 181
 182     def __init__(self, field):
 183         """Initialize a new instance.
 184
 185         `field' is an unparsed address header field, containing
 186         one or more addresses.
 187         """
 188         self.specials = '()<>@,:;.\"[]'
 189         self.pos = 0
 190         self.LWS = ' \t'
 191         self.CR = '\r\n'
 192         self.FWS = self.LWS + self.CR
 193         self.atomends = self.specials + self.LWS + self.CR
 194         # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
 195         # is obsolete syntax.  RFC 2822 requires that we recognize obsolete
 196         # syntax, so allow dots in phrases.
 197         self.phraseends = self.atomends.replace('.', '')
 198         self.field = field
 199         self.commentlist = []
 200
 201     def gotonext(self):
 202         """Parse up to the start of the next address."""
 203         while self.pos < len(self.field):
 204             if self.field[self.pos] in self.LWS + '\n\r':
 205                 self.pos += 1
 206             elif self.field[self.pos] == '(':
 207                 self.commentlist.append(self.getcomment())
 208             else:
 209                 break
 210
 211     def getaddrlist(self):
 212         """Parse all addresses.
 213
 214         Returns a list containing all of the addresses.
 215         """
 216         result = []
 217         while self.pos < len(self.field):
 218             ad = self.getaddress()
 219             if ad:
 220                 result += ad
 221             else:
 222                 result.append(('', ''))
 223         return result
 224
 225     def getaddress(self):
 226         """Parse the next address."""
 227         self.commentlist = []
 228         self.gotonext()
 229
 230         oldpos = self.pos
 231         oldcl = self.commentlist
 232         plist = self.getphraselist()
 233
 234         self.gotonext()
 235         returnlist = []
 236
 237         if self.pos >= len(self.field):
 238             # Bad email address technically, no domain.
 239             if plist:
 240                 returnlist = [(SPACE.join(self.commentlist), plist[0])]
 241
 242         elif self.field[self.pos] in '.@':
 243             # email address is just an addrspec
 244             # this isn't very efficient since we start over
 245             self.pos = oldpos
 246             self.commentlist = oldcl
 247             addrspec = self.getaddrspec()
 248             returnlist = [(SPACE.join(self.commentlist), addrspec)]
 249
 250         elif self.field[self.pos] == ':':
 251             # address is a group
 252             returnlist = []
 253
 254             fieldlen = len(self.field)
 255             self.pos += 1
 256             while self.pos < len(self.field):
 257                 self.gotonext()
 258                 if self.pos < fieldlen and self.field[self.pos] == ';':
 259                     self.pos += 1
 260                     break
 261                 returnlist = returnlist + self.getaddress()
 262
 263         elif self.field[self.pos] == '<':
 264             # Address is a phrase then a route addr
 265             routeaddr = self.getrouteaddr()
 266
 267             if self.commentlist:
 268                 returnlist = [(SPACE.join(plist) + ' (' +
 269                                ' '.join(self.commentlist) + ')', routeaddr)]
 270             else:
 271                 returnlist = [(SPACE.join(plist), routeaddr)]
 272
 273         else:
 274             if plist:
 275                 returnlist = [(SPACE.join(self.commentlist), plist[0])]
 276             elif self.field[self.pos] in self.specials:
 277                 self.pos += 1
 278
 279         self.gotonext()
 280         if self.pos < len(self.field) and self.field[self.pos] == ',':
 281             self.pos += 1
 282         return returnlist
 283
 284     def getrouteaddr(self):
 285         """Parse a route address (Return-path value).
 286
 287         This method just skips all the route stuff and returns the addrspec.
 288         """
 289         if self.field[self.pos] != '<':
 290             return
 291
 292         expectroute = False
 293         self.pos += 1
 294         self.gotonext()
 295         adlist = ''
 296         while self.pos < len(self.field):
 297             if expectroute:
 298                 self.getdomain()
 299                 expectroute = False
 300             elif self.field[self.pos] == '>':
 301                 self.pos += 1
 302                 break
 303             elif self.field[self.pos] == '@':
 304                 self.pos += 1
 305                 expectroute = True
 306             elif self.field[self.pos] == ':':
 307                 self.pos += 1
 308             else:
 309                 adlist = self.getaddrspec()
 310                 self.pos += 1
 311                 break
 312             self.gotonext()
 313
 314         return adlist
 315
 316     def getaddrspec(self):
 317         """Parse an RFC 2822 addr-spec."""
 318         aslist = []
 319
 320         self.gotonext()
 321         while self.pos < len(self.field):
 322             if self.field[self.pos] == '.':
 323                 aslist.append('.')
 324                 self.pos += 1
 325             elif self.field[self.pos] == '"':
 326                 aslist.append('"%s"' % quote(self.getquote()))
 327             elif self.field[self.pos] in self.atomends:
 328                 break
 329             else:
 330                 aslist.append(self.getatom())
 331             self.gotonext()
 332
 333         if self.pos >= len(self.field) or self.field[self.pos] != '@':
 334             return EMPTYSTRING.join(aslist)
 335
 336         aslist.append('@')
 337         self.pos += 1
 338         self.gotonext()
 339         return EMPTYSTRING.join(aslist) + self.getdomain()
 340
 341     def getdomain(self):
 342         """Get the complete domain name from an address."""
 343         sdlist = []
 344         while self.pos < len(self.field):
 345             if self.field[self.pos] in self.LWS:
 346                 self.pos += 1
 347             elif self.field[self.pos] == '(':
 348                 self.commentlist.append(self.getcomment())
 349             elif self.field[self.pos] == '[':
 350                 sdlist.append(self.getdomainliteral())
 351             elif self.field[self.pos] == '.':
 352                 self.pos += 1
 353                 sdlist.append('.')
 354             elif self.field[self.pos] in self.atomends:
 355                 break
 356             else:
 357                 sdlist.append(self.getatom())
 358         return EMPTYSTRING.join(sdlist)
 359
 360     def getdelimited(self, beginchar, endchars, allowcomments=True):
 361         """Parse a header fragment delimited by special characters.
 362
 363         `beginchar' is the start character for the fragment.
 364         If self is not looking at an instance of `beginchar' then
 365         getdelimited returns the empty string.
 366
 367         `endchars' is a sequence of allowable end-delimiting characters.
 368         Parsing stops when one of these is encountered.
 369
 370         If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
 371         within the parsed fragment.
 372         """
 373         if self.field[self.pos] != beginchar:
 374             return ''
 375
 376         slist = ['']
 377         quote = False
 378         self.pos += 1
 379         while self.pos < len(self.field):
 380             if quote:
 381                 slist.append(self.field[self.pos])
 382                 quote = False
 383             elif self.field[self.pos] in endchars:
 384                 self.pos += 1
 385                 break
 386             elif allowcomments and self.field[self.pos] == '(':
 387                 slist.append(self.getcomment())
 388                 continue        # have already advanced pos from getcomment
 389             elif self.field[self.pos] == '\\':
 390                 quote = True
 391             else:
 392                 slist.append(self.field[self.pos])
 393             self.pos += 1
 394
 395         return EMPTYSTRING.join(slist)
 396
 397     def getquote(self):
 398         """Get a quote-delimited fragment from self's field."""
 399         return self.getdelimited('"', '"\r', False)
 400
 401     def getcomment(self):
 402         """Get a parenthesis-delimited fragment from self's field."""
 403         return self.getdelimited('(', ')\r', True)
 404
 405     def getdomainliteral(self):
 406         """Parse an RFC 2822 domain-literal."""
 407         return '[%s]' % self.getdelimited('[', ']\r', False)
 408
 409     def getatom(self, atomends=None):
 410         """Parse an RFC 2822 atom.
 411
 412         Optional atomends specifies a different set of end token delimiters
 413         (the default is to use self.atomends).  This is used e.g. in
 414         getphraselist() since phrase endings must not include the `.' (which
 415         is legal in phrases)."""
 416         atomlist = ['']
 417         if atomends is None:
 418             atomends = self.atomends
 419
 420         while self.pos < len(self.field):
 421             if self.field[self.pos] in atomends:
 422                 break
 423             else:
 424                 atomlist.append(self.field[self.pos])
 425             self.pos += 1
 426
 427         return EMPTYSTRING.join(atomlist)
 428
 429     def getphraselist(self):
 430         """Parse a sequence of RFC 2822 phrases.
 431
 432         A phrase is a sequence of words, which are in turn either RFC 2822
 433         atoms or quoted-strings.  Phrases are canonicalized by squeezing all
 434         runs of continuous whitespace into one space.
 435         """
 436         plist = []
 437
 438         while self.pos < len(self.field):
 439             if self.field[self.pos] in self.FWS:
 440                 self.pos += 1
 441             elif self.field[self.pos] == '"':
 442                 plist.append(self.getquote())
 443             elif self.field[self.pos] == '(':
 444                 self.commentlist.append(self.getcomment())
 445             elif self.field[self.pos] in self.phraseends:
 446                 break
 447             else:
 448                 plist.append(self.getatom(self.phraseends))
 449
 450         return plist
 451
 452 class AddressList(AddrlistClass):
 453     """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
 454     def __init__(self, field):
 455         AddrlistClass.__init__(self, field)
 456         if field:
 457             self.addresslist = self.getaddrlist()
 458         else:
 459             self.addresslist = []
 460
 461     def __len__(self):
 462         return len(self.addresslist)
 463
 464     def __add__(self, other):
 465         # Set union
 466         newaddr = AddressList(None)
 467         newaddr.addresslist = self.addresslist[:]
 468         for x in other.addresslist:
 469             if not x in self.addresslist:
 470                 newaddr.addresslist.append(x)
 471         return newaddr
 472
 473     def __iadd__(self, other):
 474         # Set union, in-place
 475         for x in other.addresslist:
 476             if not x in self.addresslist:
 477                 self.addresslist.append(x)
 478         return self
 479
 480     def __sub__(self, other):
 481         # Set difference
 482         newaddr = AddressList(None)
 483         for x in self.addresslist:
 484             if not x in other.addresslist:
 485                 newaddr.addresslist.append(x)
 486         return newaddr
 487
 488     def __isub__(self, other):
 489         # Set difference, in-place
 490         for x in other.addresslist:
 491             if x in self.addresslist:
 492                 self.addresslist.remove(x)
 493         return self
 494
 495     def __getitem__(self, index):
 496         # Make indexing, slices, and 'in' work
 497         return self.addresslist[index]