]>
Commit | Line | Data |
---|---|---|
4710c53d | 1 | # Copyright (C) 2001-2010 Python Software Foundation\r |
2 | # Contact: email-sig@python.org\r | |
3 | \r | |
4 | """Classes to generate plain text from a message object tree."""\r | |
5 | \r | |
6 | __all__ = ['Generator', 'DecodedGenerator']\r | |
7 | \r | |
8 | import re\r | |
9 | import sys\r | |
10 | import time\r | |
11 | import random\r | |
12 | import warnings\r | |
13 | \r | |
14 | from cStringIO import StringIO\r | |
15 | from email.header import Header\r | |
16 | \r | |
17 | UNDERSCORE = '_'\r | |
18 | NL = '\n'\r | |
19 | \r | |
20 | fcre = re.compile(r'^From ', re.MULTILINE)\r | |
21 | \r | |
22 | def _is8bitstring(s):\r | |
23 | if isinstance(s, str):\r | |
24 | try:\r | |
25 | unicode(s, 'us-ascii')\r | |
26 | except UnicodeError:\r | |
27 | return True\r | |
28 | return False\r | |
29 | \r | |
30 | \r | |
31 | \f\r | |
32 | class Generator:\r | |
33 | """Generates output from a Message object tree.\r | |
34 | \r | |
35 | This basic generator writes the message to the given file object as plain\r | |
36 | text.\r | |
37 | """\r | |
38 | #\r | |
39 | # Public interface\r | |
40 | #\r | |
41 | \r | |
42 | def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):\r | |
43 | """Create the generator for message flattening.\r | |
44 | \r | |
45 | outfp is the output file-like object for writing the message to. It\r | |
46 | must have a write() method.\r | |
47 | \r | |
48 | Optional mangle_from_ is a flag that, when True (the default), escapes\r | |
49 | From_ lines in the body of the message by putting a `>' in front of\r | |
50 | them.\r | |
51 | \r | |
52 | Optional maxheaderlen specifies the longest length for a non-continued\r | |
53 | header. When a header line is longer (in characters, with tabs\r | |
54 | expanded to 8 spaces) than maxheaderlen, the header will split as\r | |
55 | defined in the Header class. Set maxheaderlen to zero to disable\r | |
56 | header wrapping. The default is 78, as recommended (but not required)\r | |
57 | by RFC 2822.\r | |
58 | """\r | |
59 | self._fp = outfp\r | |
60 | self._mangle_from_ = mangle_from_\r | |
61 | self._maxheaderlen = maxheaderlen\r | |
62 | \r | |
63 | def write(self, s):\r | |
64 | # Just delegate to the file object\r | |
65 | self._fp.write(s)\r | |
66 | \r | |
67 | def flatten(self, msg, unixfrom=False):\r | |
68 | """Print the message object tree rooted at msg to the output file\r | |
69 | specified when the Generator instance was created.\r | |
70 | \r | |
71 | unixfrom is a flag that forces the printing of a Unix From_ delimiter\r | |
72 | before the first object in the message tree. If the original message\r | |
73 | has no From_ delimiter, a `standard' one is crafted. By default, this\r | |
74 | is False to inhibit the printing of any From_ delimiter.\r | |
75 | \r | |
76 | Note that for subobjects, no From_ line is printed.\r | |
77 | """\r | |
78 | if unixfrom:\r | |
79 | ufrom = msg.get_unixfrom()\r | |
80 | if not ufrom:\r | |
81 | ufrom = 'From nobody ' + time.ctime(time.time())\r | |
82 | print >> self._fp, ufrom\r | |
83 | self._write(msg)\r | |
84 | \r | |
85 | def clone(self, fp):\r | |
86 | """Clone this generator with the exact same options."""\r | |
87 | return self.__class__(fp, self._mangle_from_, self._maxheaderlen)\r | |
88 | \r | |
89 | #\r | |
90 | # Protected interface - undocumented ;/\r | |
91 | #\r | |
92 | \r | |
93 | def _write(self, msg):\r | |
94 | # We can't write the headers yet because of the following scenario:\r | |
95 | # say a multipart message includes the boundary string somewhere in\r | |
96 | # its body. We'd have to calculate the new boundary /before/ we write\r | |
97 | # the headers so that we can write the correct Content-Type:\r | |
98 | # parameter.\r | |
99 | #\r | |
100 | # The way we do this, so as to make the _handle_*() methods simpler,\r | |
101 | # is to cache any subpart writes into a StringIO. The we write the\r | |
102 | # headers and the StringIO contents. That way, subpart handlers can\r | |
103 | # Do The Right Thing, and can still modify the Content-Type: header if\r | |
104 | # necessary.\r | |
105 | oldfp = self._fp\r | |
106 | try:\r | |
107 | self._fp = sfp = StringIO()\r | |
108 | self._dispatch(msg)\r | |
109 | finally:\r | |
110 | self._fp = oldfp\r | |
111 | # Write the headers. First we see if the message object wants to\r | |
112 | # handle that itself. If not, we'll do it generically.\r | |
113 | meth = getattr(msg, '_write_headers', None)\r | |
114 | if meth is None:\r | |
115 | self._write_headers(msg)\r | |
116 | else:\r | |
117 | meth(self)\r | |
118 | self._fp.write(sfp.getvalue())\r | |
119 | \r | |
120 | def _dispatch(self, msg):\r | |
121 | # Get the Content-Type: for the message, then try to dispatch to\r | |
122 | # self._handle_<maintype>_<subtype>(). If there's no handler for the\r | |
123 | # full MIME type, then dispatch to self._handle_<maintype>(). If\r | |
124 | # that's missing too, then dispatch to self._writeBody().\r | |
125 | main = msg.get_content_maintype()\r | |
126 | sub = msg.get_content_subtype()\r | |
127 | specific = UNDERSCORE.join((main, sub)).replace('-', '_')\r | |
128 | meth = getattr(self, '_handle_' + specific, None)\r | |
129 | if meth is None:\r | |
130 | generic = main.replace('-', '_')\r | |
131 | meth = getattr(self, '_handle_' + generic, None)\r | |
132 | if meth is None:\r | |
133 | meth = self._writeBody\r | |
134 | meth(msg)\r | |
135 | \r | |
136 | #\r | |
137 | # Default handlers\r | |
138 | #\r | |
139 | \r | |
140 | def _write_headers(self, msg):\r | |
141 | for h, v in msg.items():\r | |
142 | print >> self._fp, '%s:' % h,\r | |
143 | if self._maxheaderlen == 0:\r | |
144 | # Explicit no-wrapping\r | |
145 | print >> self._fp, v\r | |
146 | elif isinstance(v, Header):\r | |
147 | # Header instances know what to do\r | |
148 | print >> self._fp, v.encode()\r | |
149 | elif _is8bitstring(v):\r | |
150 | # If we have raw 8bit data in a byte string, we have no idea\r | |
151 | # what the encoding is. There is no safe way to split this\r | |
152 | # string. If it's ascii-subset, then we could do a normal\r | |
153 | # ascii split, but if it's multibyte then we could break the\r | |
154 | # string. There's no way to know so the least harm seems to\r | |
155 | # be to not split the string and risk it being too long.\r | |
156 | print >> self._fp, v\r | |
157 | else:\r | |
158 | # Header's got lots of smarts, so use it. Note that this is\r | |
159 | # fundamentally broken though because we lose idempotency when\r | |
160 | # the header string is continued with tabs. It will now be\r | |
161 | # continued with spaces. This was reversedly broken before we\r | |
162 | # fixed bug 1974. Either way, we lose.\r | |
163 | print >> self._fp, Header(\r | |
164 | v, maxlinelen=self._maxheaderlen, header_name=h).encode()\r | |
165 | # A blank line always separates headers from body\r | |
166 | print >> self._fp\r | |
167 | \r | |
168 | #\r | |
169 | # Handlers for writing types and subtypes\r | |
170 | #\r | |
171 | \r | |
172 | def _handle_text(self, msg):\r | |
173 | payload = msg.get_payload()\r | |
174 | if payload is None:\r | |
175 | return\r | |
176 | if not isinstance(payload, basestring):\r | |
177 | raise TypeError('string payload expected: %s' % type(payload))\r | |
178 | if self._mangle_from_:\r | |
179 | payload = fcre.sub('>From ', payload)\r | |
180 | self._fp.write(payload)\r | |
181 | \r | |
182 | # Default body handler\r | |
183 | _writeBody = _handle_text\r | |
184 | \r | |
185 | def _handle_multipart(self, msg):\r | |
186 | # The trick here is to write out each part separately, merge them all\r | |
187 | # together, and then make sure that the boundary we've chosen isn't\r | |
188 | # present in the payload.\r | |
189 | msgtexts = []\r | |
190 | subparts = msg.get_payload()\r | |
191 | if subparts is None:\r | |
192 | subparts = []\r | |
193 | elif isinstance(subparts, basestring):\r | |
194 | # e.g. a non-strict parse of a message with no starting boundary.\r | |
195 | self._fp.write(subparts)\r | |
196 | return\r | |
197 | elif not isinstance(subparts, list):\r | |
198 | # Scalar payload\r | |
199 | subparts = [subparts]\r | |
200 | for part in subparts:\r | |
201 | s = StringIO()\r | |
202 | g = self.clone(s)\r | |
203 | g.flatten(part, unixfrom=False)\r | |
204 | msgtexts.append(s.getvalue())\r | |
205 | # BAW: What about boundaries that are wrapped in double-quotes?\r | |
206 | boundary = msg.get_boundary()\r | |
207 | if not boundary:\r | |
208 | # Create a boundary that doesn't appear in any of the\r | |
209 | # message texts.\r | |
210 | alltext = NL.join(msgtexts)\r | |
211 | boundary = _make_boundary(alltext)\r | |
212 | msg.set_boundary(boundary)\r | |
213 | # If there's a preamble, write it out, with a trailing CRLF\r | |
214 | if msg.preamble is not None:\r | |
215 | print >> self._fp, msg.preamble\r | |
216 | # dash-boundary transport-padding CRLF\r | |
217 | print >> self._fp, '--' + boundary\r | |
218 | # body-part\r | |
219 | if msgtexts:\r | |
220 | self._fp.write(msgtexts.pop(0))\r | |
221 | # *encapsulation\r | |
222 | # --> delimiter transport-padding\r | |
223 | # --> CRLF body-part\r | |
224 | for body_part in msgtexts:\r | |
225 | # delimiter transport-padding CRLF\r | |
226 | print >> self._fp, '\n--' + boundary\r | |
227 | # body-part\r | |
228 | self._fp.write(body_part)\r | |
229 | # close-delimiter transport-padding\r | |
230 | self._fp.write('\n--' + boundary + '--')\r | |
231 | if msg.epilogue is not None:\r | |
232 | print >> self._fp\r | |
233 | self._fp.write(msg.epilogue)\r | |
234 | \r | |
235 | def _handle_multipart_signed(self, msg):\r | |
236 | # The contents of signed parts has to stay unmodified in order to keep\r | |
237 | # the signature intact per RFC1847 2.1, so we disable header wrapping.\r | |
238 | # RDM: This isn't enough to completely preserve the part, but it helps.\r | |
239 | old_maxheaderlen = self._maxheaderlen\r | |
240 | try:\r | |
241 | self._maxheaderlen = 0\r | |
242 | self._handle_multipart(msg)\r | |
243 | finally:\r | |
244 | self._maxheaderlen = old_maxheaderlen\r | |
245 | \r | |
246 | def _handle_message_delivery_status(self, msg):\r | |
247 | # We can't just write the headers directly to self's file object\r | |
248 | # because this will leave an extra newline between the last header\r | |
249 | # block and the boundary. Sigh.\r | |
250 | blocks = []\r | |
251 | for part in msg.get_payload():\r | |
252 | s = StringIO()\r | |
253 | g = self.clone(s)\r | |
254 | g.flatten(part, unixfrom=False)\r | |
255 | text = s.getvalue()\r | |
256 | lines = text.split('\n')\r | |
257 | # Strip off the unnecessary trailing empty line\r | |
258 | if lines and lines[-1] == '':\r | |
259 | blocks.append(NL.join(lines[:-1]))\r | |
260 | else:\r | |
261 | blocks.append(text)\r | |
262 | # Now join all the blocks with an empty line. This has the lovely\r | |
263 | # effect of separating each block with an empty line, but not adding\r | |
264 | # an extra one after the last one.\r | |
265 | self._fp.write(NL.join(blocks))\r | |
266 | \r | |
267 | def _handle_message(self, msg):\r | |
268 | s = StringIO()\r | |
269 | g = self.clone(s)\r | |
270 | # The payload of a message/rfc822 part should be a multipart sequence\r | |
271 | # of length 1. The zeroth element of the list should be the Message\r | |
272 | # object for the subpart. Extract that object, stringify it, and\r | |
273 | # write it out.\r | |
274 | # Except, it turns out, when it's a string instead, which happens when\r | |
275 | # and only when HeaderParser is used on a message of mime type\r | |
276 | # message/rfc822. Such messages are generated by, for example,\r | |
277 | # Groupwise when forwarding unadorned messages. (Issue 7970.) So\r | |
278 | # in that case we just emit the string body.\r | |
279 | payload = msg.get_payload()\r | |
280 | if isinstance(payload, list):\r | |
281 | g.flatten(msg.get_payload(0), unixfrom=False)\r | |
282 | payload = s.getvalue()\r | |
283 | self._fp.write(payload)\r | |
284 | \r | |
285 | \r | |
286 | \f\r | |
287 | _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'\r | |
288 | \r | |
289 | class DecodedGenerator(Generator):\r | |
290 | """Generates a text representation of a message.\r | |
291 | \r | |
292 | Like the Generator base class, except that non-text parts are substituted\r | |
293 | with a format string representing the part.\r | |
294 | """\r | |
295 | def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):\r | |
296 | """Like Generator.__init__() except that an additional optional\r | |
297 | argument is allowed.\r | |
298 | \r | |
299 | Walks through all subparts of a message. If the subpart is of main\r | |
300 | type `text', then it prints the decoded payload of the subpart.\r | |
301 | \r | |
302 | Otherwise, fmt is a format string that is used instead of the message\r | |
303 | payload. fmt is expanded with the following keywords (in\r | |
304 | %(keyword)s format):\r | |
305 | \r | |
306 | type : Full MIME type of the non-text part\r | |
307 | maintype : Main MIME type of the non-text part\r | |
308 | subtype : Sub-MIME type of the non-text part\r | |
309 | filename : Filename of the non-text part\r | |
310 | description: Description associated with the non-text part\r | |
311 | encoding : Content transfer encoding of the non-text part\r | |
312 | \r | |
313 | The default value for fmt is None, meaning\r | |
314 | \r | |
315 | [Non-text (%(type)s) part of message omitted, filename %(filename)s]\r | |
316 | """\r | |
317 | Generator.__init__(self, outfp, mangle_from_, maxheaderlen)\r | |
318 | if fmt is None:\r | |
319 | self._fmt = _FMT\r | |
320 | else:\r | |
321 | self._fmt = fmt\r | |
322 | \r | |
323 | def _dispatch(self, msg):\r | |
324 | for part in msg.walk():\r | |
325 | maintype = part.get_content_maintype()\r | |
326 | if maintype == 'text':\r | |
327 | print >> self, part.get_payload(decode=True)\r | |
328 | elif maintype == 'multipart':\r | |
329 | # Just skip this\r | |
330 | pass\r | |
331 | else:\r | |
332 | print >> self, self._fmt % {\r | |
333 | 'type' : part.get_content_type(),\r | |
334 | 'maintype' : part.get_content_maintype(),\r | |
335 | 'subtype' : part.get_content_subtype(),\r | |
336 | 'filename' : part.get_filename('[no filename]'),\r | |
337 | 'description': part.get('Content-Description',\r | |
338 | '[no description]'),\r | |
339 | 'encoding' : part.get('Content-Transfer-Encoding',\r | |
340 | '[no encoding]'),\r | |
341 | }\r | |
342 | \r | |
343 | \r | |
344 | \f\r | |
345 | # Helper\r | |
346 | _width = len(repr(sys.maxint-1))\r | |
347 | _fmt = '%%0%dd' % _width\r | |
348 | \r | |
349 | def _make_boundary(text=None):\r | |
350 | # Craft a random boundary. If text is given, ensure that the chosen\r | |
351 | # boundary doesn't appear in the text.\r | |
352 | token = random.randrange(sys.maxint)\r | |
353 | boundary = ('=' * 15) + (_fmt % token) + '=='\r | |
354 | if text is None:\r | |
355 | return boundary\r | |
356 | b = boundary\r | |
357 | counter = 0\r | |
358 | while True:\r | |
359 | cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)\r | |
360 | if not cre.search(text):\r | |
361 | break\r | |
362 | b = boundary + '.' + str(counter)\r | |
363 | counter += 1\r | |
364 | return b\r |