]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/Python-2.7.2/Lib/rfc822.py
AppPkg/Applications/Python: Add Python 2.7.2 sources since the release of Python...
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Lib / rfc822.py
CommitLineData
4710c53d 1"""RFC 2822 message manipulation.\r
2\r
3Note: This is only a very rough sketch of a full RFC-822 parser; in particular\r
4the tokenizing of addresses does not adhere to all the quoting rules.\r
5\r
6Note: RFC 2822 is a long awaited update to RFC 822. This module should\r
7conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some\r
8effort at RFC 2822 updates have been made, but a thorough audit has not been\r
9performed. Consider any RFC 2822 non-conformance to be a bug.\r
10\r
11 RFC 2822: http://www.faqs.org/rfcs/rfc2822.html\r
12 RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)\r
13\r
14Directions for use:\r
15\r
16To create a Message object: first open a file, e.g.:\r
17\r
18 fp = open(file, 'r')\r
19\r
20You can use any other legal way of getting an open file object, e.g. use\r
21sys.stdin or call os.popen(). Then pass the open file object to the Message()\r
22constructor:\r
23\r
24 m = Message(fp)\r
25\r
26This class can work with any input object that supports a readline method. If\r
27the input object has seek and tell capability, the rewindbody method will\r
28work; also illegal lines will be pushed back onto the input stream. If the\r
29input object lacks seek but has an `unread' method that can push back a line\r
30of input, Message will use that to push back illegal lines. Thus this class\r
31can be used to parse messages coming from a buffered stream.\r
32\r
33The optional `seekable' argument is provided as a workaround for certain stdio\r
34libraries in which tell() discards buffered data before discovering that the\r
35lseek() system call doesn't work. For maximum portability, you should set the\r
36seekable argument to zero to prevent that initial \code{tell} when passing in\r
37an unseekable object such as a a file object created from a socket object. If\r
38it is 1 on entry -- which it is by default -- the tell() method of the open\r
39file object is called once; if this raises an exception, seekable is reset to\r
400. For other nonzero values of seekable, this test is not made.\r
41\r
42To get the text of a particular header there are several methods:\r
43\r
44 str = m.getheader(name)\r
45 str = m.getrawheader(name)\r
46\r
47where name is the name of the header, e.g. 'Subject'. The difference is that\r
48getheader() strips the leading and trailing whitespace, while getrawheader()\r
49doesn't. Both functions retain embedded whitespace (including newlines)\r
50exactly as they are specified in the header, and leave the case of the text\r
51unchanged.\r
52\r
53For addresses and address lists there are functions\r
54\r
55 realname, mailaddress = m.getaddr(name)\r
56 list = m.getaddrlist(name)\r
57\r
58where the latter returns a list of (realname, mailaddr) tuples.\r
59\r
60There is also a method\r
61\r
62 time = m.getdate(name)\r
63\r
64which parses a Date-like field and returns a time-compatible tuple,\r
65i.e. a tuple such as returned by time.localtime() or accepted by\r
66time.mktime().\r
67\r
68See the class definition for lower level access methods.\r
69\r
70There are also some utility functions here.\r
71"""\r
72# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>\r
73\r
74import time\r
75\r
76from warnings import warnpy3k\r
77warnpy3k("in 3.x, rfc822 has been removed in favor of the email package",\r
78 stacklevel=2)\r
79\r
80__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]\r
81\r
82_blanklines = ('\r\n', '\n') # Optimization for islast()\r
83\r
84\r
85class Message:\r
86 """Represents a single RFC 2822-compliant message."""\r
87\r
88 def __init__(self, fp, seekable = 1):\r
89 """Initialize the class instance and read the headers."""\r
90 if seekable == 1:\r
91 # Exercise tell() to make sure it works\r
92 # (and then assume seek() works, too)\r
93 try:\r
94 fp.tell()\r
95 except (AttributeError, IOError):\r
96 seekable = 0\r
97 self.fp = fp\r
98 self.seekable = seekable\r
99 self.startofheaders = None\r
100 self.startofbody = None\r
101 #\r
102 if self.seekable:\r
103 try:\r
104 self.startofheaders = self.fp.tell()\r
105 except IOError:\r
106 self.seekable = 0\r
107 #\r
108 self.readheaders()\r
109 #\r
110 if self.seekable:\r
111 try:\r
112 self.startofbody = self.fp.tell()\r
113 except IOError:\r
114 self.seekable = 0\r
115\r
116 def rewindbody(self):\r
117 """Rewind the file to the start of the body (if seekable)."""\r
118 if not self.seekable:\r
119 raise IOError, "unseekable file"\r
120 self.fp.seek(self.startofbody)\r
121\r
122 def readheaders(self):\r
123 """Read header lines.\r
124\r
125 Read header lines up to the entirely blank line that terminates them.\r
126 The (normally blank) line that ends the headers is skipped, but not\r
127 included in the returned list. If a non-header line ends the headers,\r
128 (which is an error), an attempt is made to backspace over it; it is\r
129 never included in the returned list.\r
130\r
131 The variable self.status is set to the empty string if all went well,\r
132 otherwise it is an error message. The variable self.headers is a\r
133 completely uninterpreted list of lines contained in the header (so\r
134 printing them will reproduce the header exactly as it appears in the\r
135 file).\r
136 """\r
137 self.dict = {}\r
138 self.unixfrom = ''\r
139 self.headers = lst = []\r
140 self.status = ''\r
141 headerseen = ""\r
142 firstline = 1\r
143 startofline = unread = tell = None\r
144 if hasattr(self.fp, 'unread'):\r
145 unread = self.fp.unread\r
146 elif self.seekable:\r
147 tell = self.fp.tell\r
148 while 1:\r
149 if tell:\r
150 try:\r
151 startofline = tell()\r
152 except IOError:\r
153 startofline = tell = None\r
154 self.seekable = 0\r
155 line = self.fp.readline()\r
156 if not line:\r
157 self.status = 'EOF in headers'\r
158 break\r
159 # Skip unix From name time lines\r
160 if firstline and line.startswith('From '):\r
161 self.unixfrom = self.unixfrom + line\r
162 continue\r
163 firstline = 0\r
164 if headerseen and line[0] in ' \t':\r
165 # It's a continuation line.\r
166 lst.append(line)\r
167 x = (self.dict[headerseen] + "\n " + line.strip())\r
168 self.dict[headerseen] = x.strip()\r
169 continue\r
170 elif self.iscomment(line):\r
171 # It's a comment. Ignore it.\r
172 continue\r
173 elif self.islast(line):\r
174 # Note! No pushback here! The delimiter line gets eaten.\r
175 break\r
176 headerseen = self.isheader(line)\r
177 if headerseen:\r
178 # It's a legal header line, save it.\r
179 lst.append(line)\r
180 self.dict[headerseen] = line[len(headerseen)+1:].strip()\r
181 continue\r
182 else:\r
183 # It's not a header line; throw it back and stop here.\r
184 if not self.dict:\r
185 self.status = 'No headers'\r
186 else:\r
187 self.status = 'Non-header line where header expected'\r
188 # Try to undo the read.\r
189 if unread:\r
190 unread(line)\r
191 elif tell:\r
192 self.fp.seek(startofline)\r
193 else:\r
194 self.status = self.status + '; bad seek'\r
195 break\r
196\r
197 def isheader(self, line):\r
198 """Determine whether a given line is a legal header.\r
199\r
200 This method should return the header name, suitably canonicalized.\r
201 You may override this method in order to use Message parsing on tagged\r
202 data in RFC 2822-like formats with special header formats.\r
203 """\r
204 i = line.find(':')\r
205 if i > 0:\r
206 return line[:i].lower()\r
207 return None\r
208\r
209 def islast(self, line):\r
210 """Determine whether a line is a legal end of RFC 2822 headers.\r
211\r
212 You may override this method if your application wants to bend the\r
213 rules, e.g. to strip trailing whitespace, or to recognize MH template\r
214 separators ('--------'). For convenience (e.g. for code reading from\r
215 sockets) a line consisting of \r\n also matches.\r
216 """\r
217 return line in _blanklines\r
218\r
219 def iscomment(self, line):\r
220 """Determine whether a line should be skipped entirely.\r
221\r
222 You may override this method in order to use Message parsing on tagged\r
223 data in RFC 2822-like formats that support embedded comments or\r
224 free-text data.\r
225 """\r
226 return False\r
227\r
228 def getallmatchingheaders(self, name):\r
229 """Find all header lines matching a given header name.\r
230\r
231 Look through the list of headers and find all lines matching a given\r
232 header name (and their continuation lines). A list of the lines is\r
233 returned, without interpretation. If the header does not occur, an\r
234 empty list is returned. If the header occurs multiple times, all\r
235 occurrences are returned. Case is not important in the header name.\r
236 """\r
237 name = name.lower() + ':'\r
238 n = len(name)\r
239 lst = []\r
240 hit = 0\r
241 for line in self.headers:\r
242 if line[:n].lower() == name:\r
243 hit = 1\r
244 elif not line[:1].isspace():\r
245 hit = 0\r
246 if hit:\r
247 lst.append(line)\r
248 return lst\r
249\r
250 def getfirstmatchingheader(self, name):\r
251 """Get the first header line matching name.\r
252\r
253 This is similar to getallmatchingheaders, but it returns only the\r
254 first matching header (and its continuation lines).\r
255 """\r
256 name = name.lower() + ':'\r
257 n = len(name)\r
258 lst = []\r
259 hit = 0\r
260 for line in self.headers:\r
261 if hit:\r
262 if not line[:1].isspace():\r
263 break\r
264 elif line[:n].lower() == name:\r
265 hit = 1\r
266 if hit:\r
267 lst.append(line)\r
268 return lst\r
269\r
270 def getrawheader(self, name):\r
271 """A higher-level interface to getfirstmatchingheader().\r
272\r
273 Return a string containing the literal text of the header but with the\r
274 keyword stripped. All leading, trailing and embedded whitespace is\r
275 kept in the string, however. Return None if the header does not\r
276 occur.\r
277 """\r
278\r
279 lst = self.getfirstmatchingheader(name)\r
280 if not lst:\r
281 return None\r
282 lst[0] = lst[0][len(name) + 1:]\r
283 return ''.join(lst)\r
284\r
285 def getheader(self, name, default=None):\r
286 """Get the header value for a name.\r
287\r
288 This is the normal interface: it returns a stripped version of the\r
289 header value for a given header name, or None if it doesn't exist.\r
290 This uses the dictionary version which finds the *last* such header.\r
291 """\r
292 return self.dict.get(name.lower(), default)\r
293 get = getheader\r
294\r
295 def getheaders(self, name):\r
296 """Get all values for a header.\r
297\r
298 This returns a list of values for headers given more than once; each\r
299 value in the result list is stripped in the same way as the result of\r
300 getheader(). If the header is not given, return an empty list.\r
301 """\r
302 result = []\r
303 current = ''\r
304 have_header = 0\r
305 for s in self.getallmatchingheaders(name):\r
306 if s[0].isspace():\r
307 if current:\r
308 current = "%s\n %s" % (current, s.strip())\r
309 else:\r
310 current = s.strip()\r
311 else:\r
312 if have_header:\r
313 result.append(current)\r
314 current = s[s.find(":") + 1:].strip()\r
315 have_header = 1\r
316 if have_header:\r
317 result.append(current)\r
318 return result\r
319\r
320 def getaddr(self, name):\r
321 """Get a single address from a header, as a tuple.\r
322\r
323 An example return value:\r
324 ('Guido van Rossum', 'guido@cwi.nl')\r
325 """\r
326 # New, by Ben Escoto\r
327 alist = self.getaddrlist(name)\r
328 if alist:\r
329 return alist[0]\r
330 else:\r
331 return (None, None)\r
332\r
333 def getaddrlist(self, name):\r
334 """Get a list of addresses from a header.\r
335\r
336 Retrieves a list of addresses from a header, where each address is a\r
337 tuple as returned by getaddr(). Scans all named headers, so it works\r
338 properly with multiple To: or Cc: headers for example.\r
339 """\r
340 raw = []\r
341 for h in self.getallmatchingheaders(name):\r
342 if h[0] in ' \t':\r
343 raw.append(h)\r
344 else:\r
345 if raw:\r
346 raw.append(', ')\r
347 i = h.find(':')\r
348 if i > 0:\r
349 addr = h[i+1:]\r
350 raw.append(addr)\r
351 alladdrs = ''.join(raw)\r
352 a = AddressList(alladdrs)\r
353 return a.addresslist\r
354\r
355 def getdate(self, name):\r
356 """Retrieve a date field from a header.\r
357\r
358 Retrieves a date field from the named header, returning a tuple\r
359 compatible with time.mktime().\r
360 """\r
361 try:\r
362 data = self[name]\r
363 except KeyError:\r
364 return None\r
365 return parsedate(data)\r
366\r
367 def getdate_tz(self, name):\r
368 """Retrieve a date field from a header as a 10-tuple.\r
369\r
370 The first 9 elements make up a tuple compatible with time.mktime(),\r
371 and the 10th is the offset of the poster's time zone from GMT/UTC.\r
372 """\r
373 try:\r
374 data = self[name]\r
375 except KeyError:\r
376 return None\r
377 return parsedate_tz(data)\r
378\r
379\r
380 # Access as a dictionary (only finds *last* header of each type):\r
381\r
382 def __len__(self):\r
383 """Get the number of headers in a message."""\r
384 return len(self.dict)\r
385\r
386 def __getitem__(self, name):\r
387 """Get a specific header, as from a dictionary."""\r
388 return self.dict[name.lower()]\r
389\r
390 def __setitem__(self, name, value):\r
391 """Set the value of a header.\r
392\r
393 Note: This is not a perfect inversion of __getitem__, because any\r
394 changed headers get stuck at the end of the raw-headers list rather\r
395 than where the altered header was.\r
396 """\r
397 del self[name] # Won't fail if it doesn't exist\r
398 self.dict[name.lower()] = value\r
399 text = name + ": " + value\r
400 for line in text.split("\n"):\r
401 self.headers.append(line + "\n")\r
402\r
403 def __delitem__(self, name):\r
404 """Delete all occurrences of a specific header, if it is present."""\r
405 name = name.lower()\r
406 if not name in self.dict:\r
407 return\r
408 del self.dict[name]\r
409 name = name + ':'\r
410 n = len(name)\r
411 lst = []\r
412 hit = 0\r
413 for i in range(len(self.headers)):\r
414 line = self.headers[i]\r
415 if line[:n].lower() == name:\r
416 hit = 1\r
417 elif not line[:1].isspace():\r
418 hit = 0\r
419 if hit:\r
420 lst.append(i)\r
421 for i in reversed(lst):\r
422 del self.headers[i]\r
423\r
424 def setdefault(self, name, default=""):\r
425 lowername = name.lower()\r
426 if lowername in self.dict:\r
427 return self.dict[lowername]\r
428 else:\r
429 text = name + ": " + default\r
430 for line in text.split("\n"):\r
431 self.headers.append(line + "\n")\r
432 self.dict[lowername] = default\r
433 return default\r
434\r
435 def has_key(self, name):\r
436 """Determine whether a message contains the named header."""\r
437 return name.lower() in self.dict\r
438\r
439 def __contains__(self, name):\r
440 """Determine whether a message contains the named header."""\r
441 return name.lower() in self.dict\r
442\r
443 def __iter__(self):\r
444 return iter(self.dict)\r
445\r
446 def keys(self):\r
447 """Get all of a message's header field names."""\r
448 return self.dict.keys()\r
449\r
450 def values(self):\r
451 """Get all of a message's header field values."""\r
452 return self.dict.values()\r
453\r
454 def items(self):\r
455 """Get all of a message's headers.\r
456\r
457 Returns a list of name, value tuples.\r
458 """\r
459 return self.dict.items()\r
460\r
461 def __str__(self):\r
462 return ''.join(self.headers)\r
463\r
464\r
465# Utility functions\r
466# -----------------\r
467\r
468# XXX Should fix unquote() and quote() to be really conformant.\r
469# XXX The inverses of the parse functions may also be useful.\r
470\r
471\r
472def unquote(s):\r
473 """Remove quotes from a string."""\r
474 if len(s) > 1:\r
475 if s.startswith('"') and s.endswith('"'):\r
476 return s[1:-1].replace('\\\\', '\\').replace('\\"', '"')\r
477 if s.startswith('<') and s.endswith('>'):\r
478 return s[1:-1]\r
479 return s\r
480\r
481\r
482def quote(s):\r
483 """Add quotes around a string."""\r
484 return s.replace('\\', '\\\\').replace('"', '\\"')\r
485\r
486\r
487def parseaddr(address):\r
488 """Parse an address into a (realname, mailaddr) tuple."""\r
489 a = AddressList(address)\r
490 lst = a.addresslist\r
491 if not lst:\r
492 return (None, None)\r
493 return lst[0]\r
494\r
495\r
496class AddrlistClass:\r
497 """Address parser class by Ben Escoto.\r
498\r
499 To understand what this class does, it helps to have a copy of\r
500 RFC 2822 in front of you.\r
501\r
502 http://www.faqs.org/rfcs/rfc2822.html\r
503\r
504 Note: this class interface is deprecated and may be removed in the future.\r
505 Use rfc822.AddressList instead.\r
506 """\r
507\r
508 def __init__(self, field):\r
509 """Initialize a new instance.\r
510\r
511 `field' is an unparsed address header field, containing one or more\r
512 addresses.\r
513 """\r
514 self.specials = '()<>@,:;.\"[]'\r
515 self.pos = 0\r
516 self.LWS = ' \t'\r
517 self.CR = '\r\n'\r
518 self.atomends = self.specials + self.LWS + self.CR\r
519 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it\r
520 # is obsolete syntax. RFC 2822 requires that we recognize obsolete\r
521 # syntax, so allow dots in phrases.\r
522 self.phraseends = self.atomends.replace('.', '')\r
523 self.field = field\r
524 self.commentlist = []\r
525\r
526 def gotonext(self):\r
527 """Parse up to the start of the next address."""\r
528 while self.pos < len(self.field):\r
529 if self.field[self.pos] in self.LWS + '\n\r':\r
530 self.pos = self.pos + 1\r
531 elif self.field[self.pos] == '(':\r
532 self.commentlist.append(self.getcomment())\r
533 else: break\r
534\r
535 def getaddrlist(self):\r
536 """Parse all addresses.\r
537\r
538 Returns a list containing all of the addresses.\r
539 """\r
540 result = []\r
541 ad = self.getaddress()\r
542 while ad:\r
543 result += ad\r
544 ad = self.getaddress()\r
545 return result\r
546\r
547 def getaddress(self):\r
548 """Parse the next address."""\r
549 self.commentlist = []\r
550 self.gotonext()\r
551\r
552 oldpos = self.pos\r
553 oldcl = self.commentlist\r
554 plist = self.getphraselist()\r
555\r
556 self.gotonext()\r
557 returnlist = []\r
558\r
559 if self.pos >= len(self.field):\r
560 # Bad email address technically, no domain.\r
561 if plist:\r
562 returnlist = [(' '.join(self.commentlist), plist[0])]\r
563\r
564 elif self.field[self.pos] in '.@':\r
565 # email address is just an addrspec\r
566 # this isn't very efficient since we start over\r
567 self.pos = oldpos\r
568 self.commentlist = oldcl\r
569 addrspec = self.getaddrspec()\r
570 returnlist = [(' '.join(self.commentlist), addrspec)]\r
571\r
572 elif self.field[self.pos] == ':':\r
573 # address is a group\r
574 returnlist = []\r
575\r
576 fieldlen = len(self.field)\r
577 self.pos += 1\r
578 while self.pos < len(self.field):\r
579 self.gotonext()\r
580 if self.pos < fieldlen and self.field[self.pos] == ';':\r
581 self.pos += 1\r
582 break\r
583 returnlist = returnlist + self.getaddress()\r
584\r
585 elif self.field[self.pos] == '<':\r
586 # Address is a phrase then a route addr\r
587 routeaddr = self.getrouteaddr()\r
588\r
589 if self.commentlist:\r
590 returnlist = [(' '.join(plist) + ' (' + \\r
591 ' '.join(self.commentlist) + ')', routeaddr)]\r
592 else: returnlist = [(' '.join(plist), routeaddr)]\r
593\r
594 else:\r
595 if plist:\r
596 returnlist = [(' '.join(self.commentlist), plist[0])]\r
597 elif self.field[self.pos] in self.specials:\r
598 self.pos += 1\r
599\r
600 self.gotonext()\r
601 if self.pos < len(self.field) and self.field[self.pos] == ',':\r
602 self.pos += 1\r
603 return returnlist\r
604\r
605 def getrouteaddr(self):\r
606 """Parse a route address (Return-path value).\r
607\r
608 This method just skips all the route stuff and returns the addrspec.\r
609 """\r
610 if self.field[self.pos] != '<':\r
611 return\r
612\r
613 expectroute = 0\r
614 self.pos += 1\r
615 self.gotonext()\r
616 adlist = ""\r
617 while self.pos < len(self.field):\r
618 if expectroute:\r
619 self.getdomain()\r
620 expectroute = 0\r
621 elif self.field[self.pos] == '>':\r
622 self.pos += 1\r
623 break\r
624 elif self.field[self.pos] == '@':\r
625 self.pos += 1\r
626 expectroute = 1\r
627 elif self.field[self.pos] == ':':\r
628 self.pos += 1\r
629 else:\r
630 adlist = self.getaddrspec()\r
631 self.pos += 1\r
632 break\r
633 self.gotonext()\r
634\r
635 return adlist\r
636\r
637 def getaddrspec(self):\r
638 """Parse an RFC 2822 addr-spec."""\r
639 aslist = []\r
640\r
641 self.gotonext()\r
642 while self.pos < len(self.field):\r
643 if self.field[self.pos] == '.':\r
644 aslist.append('.')\r
645 self.pos += 1\r
646 elif self.field[self.pos] == '"':\r
647 aslist.append('"%s"' % self.getquote())\r
648 elif self.field[self.pos] in self.atomends:\r
649 break\r
650 else: aslist.append(self.getatom())\r
651 self.gotonext()\r
652\r
653 if self.pos >= len(self.field) or self.field[self.pos] != '@':\r
654 return ''.join(aslist)\r
655\r
656 aslist.append('@')\r
657 self.pos += 1\r
658 self.gotonext()\r
659 return ''.join(aslist) + self.getdomain()\r
660\r
661 def getdomain(self):\r
662 """Get the complete domain name from an address."""\r
663 sdlist = []\r
664 while self.pos < len(self.field):\r
665 if self.field[self.pos] in self.LWS:\r
666 self.pos += 1\r
667 elif self.field[self.pos] == '(':\r
668 self.commentlist.append(self.getcomment())\r
669 elif self.field[self.pos] == '[':\r
670 sdlist.append(self.getdomainliteral())\r
671 elif self.field[self.pos] == '.':\r
672 self.pos += 1\r
673 sdlist.append('.')\r
674 elif self.field[self.pos] in self.atomends:\r
675 break\r
676 else: sdlist.append(self.getatom())\r
677 return ''.join(sdlist)\r
678\r
679 def getdelimited(self, beginchar, endchars, allowcomments = 1):\r
680 """Parse a header fragment delimited by special characters.\r
681\r
682 `beginchar' is the start character for the fragment. If self is not\r
683 looking at an instance of `beginchar' then getdelimited returns the\r
684 empty string.\r
685\r
686 `endchars' is a sequence of allowable end-delimiting characters.\r
687 Parsing stops when one of these is encountered.\r
688\r
689 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed\r
690 within the parsed fragment.\r
691 """\r
692 if self.field[self.pos] != beginchar:\r
693 return ''\r
694\r
695 slist = ['']\r
696 quote = 0\r
697 self.pos += 1\r
698 while self.pos < len(self.field):\r
699 if quote == 1:\r
700 slist.append(self.field[self.pos])\r
701 quote = 0\r
702 elif self.field[self.pos] in endchars:\r
703 self.pos += 1\r
704 break\r
705 elif allowcomments and self.field[self.pos] == '(':\r
706 slist.append(self.getcomment())\r
707 continue # have already advanced pos from getcomment\r
708 elif self.field[self.pos] == '\\':\r
709 quote = 1\r
710 else:\r
711 slist.append(self.field[self.pos])\r
712 self.pos += 1\r
713\r
714 return ''.join(slist)\r
715\r
716 def getquote(self):\r
717 """Get a quote-delimited fragment from self's field."""\r
718 return self.getdelimited('"', '"\r', 0)\r
719\r
720 def getcomment(self):\r
721 """Get a parenthesis-delimited fragment from self's field."""\r
722 return self.getdelimited('(', ')\r', 1)\r
723\r
724 def getdomainliteral(self):\r
725 """Parse an RFC 2822 domain-literal."""\r
726 return '[%s]' % self.getdelimited('[', ']\r', 0)\r
727\r
728 def getatom(self, atomends=None):\r
729 """Parse an RFC 2822 atom.\r
730\r
731 Optional atomends specifies a different set of end token delimiters\r
732 (the default is to use self.atomends). This is used e.g. in\r
733 getphraselist() since phrase endings must not include the `.' (which\r
734 is legal in phrases)."""\r
735 atomlist = ['']\r
736 if atomends is None:\r
737 atomends = self.atomends\r
738\r
739 while self.pos < len(self.field):\r
740 if self.field[self.pos] in atomends:\r
741 break\r
742 else: atomlist.append(self.field[self.pos])\r
743 self.pos += 1\r
744\r
745 return ''.join(atomlist)\r
746\r
747 def getphraselist(self):\r
748 """Parse a sequence of RFC 2822 phrases.\r
749\r
750 A phrase is a sequence of words, which are in turn either RFC 2822\r
751 atoms or quoted-strings. Phrases are canonicalized by squeezing all\r
752 runs of continuous whitespace into one space.\r
753 """\r
754 plist = []\r
755\r
756 while self.pos < len(self.field):\r
757 if self.field[self.pos] in self.LWS:\r
758 self.pos += 1\r
759 elif self.field[self.pos] == '"':\r
760 plist.append(self.getquote())\r
761 elif self.field[self.pos] == '(':\r
762 self.commentlist.append(self.getcomment())\r
763 elif self.field[self.pos] in self.phraseends:\r
764 break\r
765 else:\r
766 plist.append(self.getatom(self.phraseends))\r
767\r
768 return plist\r
769\r
770class AddressList(AddrlistClass):\r
771 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""\r
772 def __init__(self, field):\r
773 AddrlistClass.__init__(self, field)\r
774 if field:\r
775 self.addresslist = self.getaddrlist()\r
776 else:\r
777 self.addresslist = []\r
778\r
779 def __len__(self):\r
780 return len(self.addresslist)\r
781\r
782 def __str__(self):\r
783 return ", ".join(map(dump_address_pair, self.addresslist))\r
784\r
785 def __add__(self, other):\r
786 # Set union\r
787 newaddr = AddressList(None)\r
788 newaddr.addresslist = self.addresslist[:]\r
789 for x in other.addresslist:\r
790 if not x in self.addresslist:\r
791 newaddr.addresslist.append(x)\r
792 return newaddr\r
793\r
794 def __iadd__(self, other):\r
795 # Set union, in-place\r
796 for x in other.addresslist:\r
797 if not x in self.addresslist:\r
798 self.addresslist.append(x)\r
799 return self\r
800\r
801 def __sub__(self, other):\r
802 # Set difference\r
803 newaddr = AddressList(None)\r
804 for x in self.addresslist:\r
805 if not x in other.addresslist:\r
806 newaddr.addresslist.append(x)\r
807 return newaddr\r
808\r
809 def __isub__(self, other):\r
810 # Set difference, in-place\r
811 for x in other.addresslist:\r
812 if x in self.addresslist:\r
813 self.addresslist.remove(x)\r
814 return self\r
815\r
816 def __getitem__(self, index):\r
817 # Make indexing, slices, and 'in' work\r
818 return self.addresslist[index]\r
819\r
820def dump_address_pair(pair):\r
821 """Dump a (name, address) pair in a canonicalized form."""\r
822 if pair[0]:\r
823 return '"' + pair[0] + '" <' + pair[1] + '>'\r
824 else:\r
825 return pair[1]\r
826\r
827# Parse a date field\r
828\r
829_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',\r
830 'aug', 'sep', 'oct', 'nov', 'dec',\r
831 'january', 'february', 'march', 'april', 'may', 'june', 'july',\r
832 'august', 'september', 'october', 'november', 'december']\r
833_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']\r
834\r
835# The timezone table does not include the military time zones defined\r
836# in RFC822, other than Z. According to RFC1123, the description in\r
837# RFC822 gets the signs wrong, so we can't rely on any such time\r
838# zones. RFC1123 recommends that numeric timezone indicators be used\r
839# instead of timezone names.\r
840\r
841_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,\r
842 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)\r
843 'EST': -500, 'EDT': -400, # Eastern\r
844 'CST': -600, 'CDT': -500, # Central\r
845 'MST': -700, 'MDT': -600, # Mountain\r
846 'PST': -800, 'PDT': -700 # Pacific\r
847 }\r
848\r
849\r
850def parsedate_tz(data):\r
851 """Convert a date string to a time tuple.\r
852\r
853 Accounts for military timezones.\r
854 """\r
855 if not data:\r
856 return None\r
857 data = data.split()\r
858 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:\r
859 # There's a dayname here. Skip it\r
860 del data[0]\r
861 else:\r
862 # no space after the "weekday,"?\r
863 i = data[0].rfind(',')\r
864 if i >= 0:\r
865 data[0] = data[0][i+1:]\r
866 if len(data) == 3: # RFC 850 date, deprecated\r
867 stuff = data[0].split('-')\r
868 if len(stuff) == 3:\r
869 data = stuff + data[1:]\r
870 if len(data) == 4:\r
871 s = data[3]\r
872 i = s.find('+')\r
873 if i > 0:\r
874 data[3:] = [s[:i], s[i+1:]]\r
875 else:\r
876 data.append('') # Dummy tz\r
877 if len(data) < 5:\r
878 return None\r
879 data = data[:5]\r
880 [dd, mm, yy, tm, tz] = data\r
881 mm = mm.lower()\r
882 if not mm in _monthnames:\r
883 dd, mm = mm, dd.lower()\r
884 if not mm in _monthnames:\r
885 return None\r
886 mm = _monthnames.index(mm)+1\r
887 if mm > 12: mm = mm - 12\r
888 if dd[-1] == ',':\r
889 dd = dd[:-1]\r
890 i = yy.find(':')\r
891 if i > 0:\r
892 yy, tm = tm, yy\r
893 if yy[-1] == ',':\r
894 yy = yy[:-1]\r
895 if not yy[0].isdigit():\r
896 yy, tz = tz, yy\r
897 if tm[-1] == ',':\r
898 tm = tm[:-1]\r
899 tm = tm.split(':')\r
900 if len(tm) == 2:\r
901 [thh, tmm] = tm\r
902 tss = '0'\r
903 elif len(tm) == 3:\r
904 [thh, tmm, tss] = tm\r
905 else:\r
906 return None\r
907 try:\r
908 yy = int(yy)\r
909 dd = int(dd)\r
910 thh = int(thh)\r
911 tmm = int(tmm)\r
912 tss = int(tss)\r
913 except ValueError:\r
914 return None\r
915 tzoffset = None\r
916 tz = tz.upper()\r
917 if tz in _timezones:\r
918 tzoffset = _timezones[tz]\r
919 else:\r
920 try:\r
921 tzoffset = int(tz)\r
922 except ValueError:\r
923 pass\r
924 # Convert a timezone offset into seconds ; -0500 -> -18000\r
925 if tzoffset:\r
926 if tzoffset < 0:\r
927 tzsign = -1\r
928 tzoffset = -tzoffset\r
929 else:\r
930 tzsign = 1\r
931 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)\r
932 return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)\r
933\r
934\r
935def parsedate(data):\r
936 """Convert a time string to a time tuple."""\r
937 t = parsedate_tz(data)\r
938 if t is None:\r
939 return t\r
940 return t[:9]\r
941\r
942\r
943def mktime_tz(data):\r
944 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""\r
945 if data[9] is None:\r
946 # No zone info, so localtime is better assumption than GMT\r
947 return time.mktime(data[:8] + (-1,))\r
948 else:\r
949 t = time.mktime(data[:8] + (0,))\r
950 return t - data[9] - time.timezone\r
951\r
952def formatdate(timeval=None):\r
953 """Returns time format preferred for Internet standards.\r
954\r
955 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123\r
956\r
957 According to RFC 1123, day and month names must always be in\r
958 English. If not for that, this code could use strftime(). It\r
959 can't because strftime() honors the locale and could generated\r
960 non-English names.\r
961 """\r
962 if timeval is None:\r
963 timeval = time.time()\r
964 timeval = time.gmtime(timeval)\r
965 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (\r
966 ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]],\r
967 timeval[2],\r
968 ("Jan", "Feb", "Mar", "Apr", "May", "Jun",\r
969 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1],\r
970 timeval[0], timeval[3], timeval[4], timeval[5])\r
971\r
972\r
973# When used as script, run a small test program.\r
974# The first command line argument must be a filename containing one\r
975# message in RFC-822 format.\r
976\r
977if __name__ == '__main__':\r
978 import sys, os\r
979 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')\r
980 if sys.argv[1:]: file = sys.argv[1]\r
981 f = open(file, 'r')\r
982 m = Message(f)\r
983 print 'From:', m.getaddr('from')\r
984 print 'To:', m.getaddrlist('to')\r
985 print 'Subject:', m.getheader('subject')\r
986 print 'Date:', m.getheader('date')\r
987 date = m.getdate_tz('date')\r
988 tz = date[-1]\r
989 date = time.localtime(mktime_tz(date))\r
990 if date:\r
991 print 'ParsedDate:', time.asctime(date),\r
992 hhmmss = tz\r
993 hhmm, ss = divmod(hhmmss, 60)\r
994 hh, mm = divmod(hhmm, 60)\r
995 print "%+03d%02d" % (hh, mm),\r
996 if ss: print ".%02d" % ss,\r
997 print\r
998 else:\r
999 print 'ParsedDate:', None\r
1000 m.rewindbody()\r
1001 n = 0\r
1002 while f.readline():\r
1003 n += 1\r
1004 print 'Lines:', n\r
1005 print '-'*70\r
1006 print 'len =', len(m)\r
1007 if 'Date' in m: print 'Date =', m['Date']\r
1008 if 'X-Nonsense' in m: pass\r
1009 print 'keys =', m.keys()\r
1010 print 'values =', m.values()\r
1011 print 'items =', m.items()\r