]>
Commit | Line | Data |
---|---|---|
4710c53d | 1 | # Copyright (C) 2001-2006 Python Software Foundation\r |
2 | # Author: Barry Warsaw\r | |
3 | # Contact: email-sig@python.org\r | |
4 | \r | |
5 | """Basic message object for the email package object model."""\r | |
6 | \r | |
7 | __all__ = ['Message']\r | |
8 | \r | |
9 | import re\r | |
10 | import uu\r | |
11 | import binascii\r | |
12 | import warnings\r | |
13 | from cStringIO import StringIO\r | |
14 | \r | |
15 | # Intrapackage imports\r | |
16 | import email.charset\r | |
17 | from email import utils\r | |
18 | from email import errors\r | |
19 | \r | |
20 | SEMISPACE = '; '\r | |
21 | \r | |
22 | # Regular expression that matches `special' characters in parameters, the\r | |
23 | # existence of which force quoting of the parameter value.\r | |
24 | tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')\r | |
25 | \r | |
26 | \r | |
27 | # Helper functions\r | |
28 | def _splitparam(param):\r | |
29 | # Split header parameters. BAW: this may be too simple. It isn't\r | |
30 | # strictly RFC 2045 (section 5.1) compliant, but it catches most headers\r | |
31 | # found in the wild. We may eventually need a full fledged parser\r | |
32 | # eventually.\r | |
33 | a, sep, b = param.partition(';')\r | |
34 | if not sep:\r | |
35 | return a.strip(), None\r | |
36 | return a.strip(), b.strip()\r | |
37 | \f\r | |
38 | def _formatparam(param, value=None, quote=True):\r | |
39 | """Convenience function to format and return a key=value pair.\r | |
40 | \r | |
41 | This will quote the value if needed or if quote is true. If value is a\r | |
42 | three tuple (charset, language, value), it will be encoded according\r | |
43 | to RFC2231 rules.\r | |
44 | """\r | |
45 | if value is not None and len(value) > 0:\r | |
46 | # A tuple is used for RFC 2231 encoded parameter values where items\r | |
47 | # are (charset, language, value). charset is a string, not a Charset\r | |
48 | # instance.\r | |
49 | if isinstance(value, tuple):\r | |
50 | # Encode as per RFC 2231\r | |
51 | param += '*'\r | |
52 | value = utils.encode_rfc2231(value[2], value[0], value[1])\r | |
53 | # BAW: Please check this. I think that if quote is set it should\r | |
54 | # force quoting even if not necessary.\r | |
55 | if quote or tspecials.search(value):\r | |
56 | return '%s="%s"' % (param, utils.quote(value))\r | |
57 | else:\r | |
58 | return '%s=%s' % (param, value)\r | |
59 | else:\r | |
60 | return param\r | |
61 | \r | |
62 | def _parseparam(s):\r | |
63 | plist = []\r | |
64 | while s[:1] == ';':\r | |
65 | s = s[1:]\r | |
66 | end = s.find(';')\r | |
67 | while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:\r | |
68 | end = s.find(';', end + 1)\r | |
69 | if end < 0:\r | |
70 | end = len(s)\r | |
71 | f = s[:end]\r | |
72 | if '=' in f:\r | |
73 | i = f.index('=')\r | |
74 | f = f[:i].strip().lower() + '=' + f[i+1:].strip()\r | |
75 | plist.append(f.strip())\r | |
76 | s = s[end:]\r | |
77 | return plist\r | |
78 | \r | |
79 | \r | |
80 | def _unquotevalue(value):\r | |
81 | # This is different than utils.collapse_rfc2231_value() because it doesn't\r | |
82 | # try to convert the value to a unicode. Message.get_param() and\r | |
83 | # Message.get_params() are both currently defined to return the tuple in\r | |
84 | # the face of RFC 2231 parameters.\r | |
85 | if isinstance(value, tuple):\r | |
86 | return value[0], value[1], utils.unquote(value[2])\r | |
87 | else:\r | |
88 | return utils.unquote(value)\r | |
89 | \r | |
90 | \r | |
91 | \f\r | |
92 | class Message:\r | |
93 | """Basic message object.\r | |
94 | \r | |
95 | A message object is defined as something that has a bunch of RFC 2822\r | |
96 | headers and a payload. It may optionally have an envelope header\r | |
97 | (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a\r | |
98 | multipart or a message/rfc822), then the payload is a list of Message\r | |
99 | objects, otherwise it is a string.\r | |
100 | \r | |
101 | Message objects implement part of the `mapping' interface, which assumes\r | |
102 | there is exactly one occurrence of the header per message. Some headers\r | |
103 | do in fact appear multiple times (e.g. Received) and for those headers,\r | |
104 | you must use the explicit API to set or get all the headers. Not all of\r | |
105 | the mapping methods are implemented.\r | |
106 | """\r | |
107 | def __init__(self):\r | |
108 | self._headers = []\r | |
109 | self._unixfrom = None\r | |
110 | self._payload = None\r | |
111 | self._charset = None\r | |
112 | # Defaults for multipart messages\r | |
113 | self.preamble = self.epilogue = None\r | |
114 | self.defects = []\r | |
115 | # Default content type\r | |
116 | self._default_type = 'text/plain'\r | |
117 | \r | |
118 | def __str__(self):\r | |
119 | """Return the entire formatted message as a string.\r | |
120 | This includes the headers, body, and envelope header.\r | |
121 | """\r | |
122 | return self.as_string(unixfrom=True)\r | |
123 | \r | |
124 | def as_string(self, unixfrom=False):\r | |
125 | """Return the entire formatted message as a string.\r | |
126 | Optional `unixfrom' when True, means include the Unix From_ envelope\r | |
127 | header.\r | |
128 | \r | |
129 | This is a convenience method and may not generate the message exactly\r | |
130 | as you intend because by default it mangles lines that begin with\r | |
131 | "From ". For more flexibility, use the flatten() method of a\r | |
132 | Generator instance.\r | |
133 | """\r | |
134 | from email.generator import Generator\r | |
135 | fp = StringIO()\r | |
136 | g = Generator(fp)\r | |
137 | g.flatten(self, unixfrom=unixfrom)\r | |
138 | return fp.getvalue()\r | |
139 | \r | |
140 | def is_multipart(self):\r | |
141 | """Return True if the message consists of multiple parts."""\r | |
142 | return isinstance(self._payload, list)\r | |
143 | \r | |
144 | #\r | |
145 | # Unix From_ line\r | |
146 | #\r | |
147 | def set_unixfrom(self, unixfrom):\r | |
148 | self._unixfrom = unixfrom\r | |
149 | \r | |
150 | def get_unixfrom(self):\r | |
151 | return self._unixfrom\r | |
152 | \r | |
153 | #\r | |
154 | # Payload manipulation.\r | |
155 | #\r | |
156 | def attach(self, payload):\r | |
157 | """Add the given payload to the current payload.\r | |
158 | \r | |
159 | The current payload will always be a list of objects after this method\r | |
160 | is called. If you want to set the payload to a scalar object, use\r | |
161 | set_payload() instead.\r | |
162 | """\r | |
163 | if self._payload is None:\r | |
164 | self._payload = [payload]\r | |
165 | else:\r | |
166 | self._payload.append(payload)\r | |
167 | \r | |
168 | def get_payload(self, i=None, decode=False):\r | |
169 | """Return a reference to the payload.\r | |
170 | \r | |
171 | The payload will either be a list object or a string. If you mutate\r | |
172 | the list object, you modify the message's payload in place. Optional\r | |
173 | i returns that index into the payload.\r | |
174 | \r | |
175 | Optional decode is a flag indicating whether the payload should be\r | |
176 | decoded or not, according to the Content-Transfer-Encoding header\r | |
177 | (default is False).\r | |
178 | \r | |
179 | When True and the message is not a multipart, the payload will be\r | |
180 | decoded if this header's value is `quoted-printable' or `base64'. If\r | |
181 | some other encoding is used, or the header is missing, or if the\r | |
182 | payload has bogus data (i.e. bogus base64 or uuencoded data), the\r | |
183 | payload is returned as-is.\r | |
184 | \r | |
185 | If the message is a multipart and the decode flag is True, then None\r | |
186 | is returned.\r | |
187 | """\r | |
188 | if i is None:\r | |
189 | payload = self._payload\r | |
190 | elif not isinstance(self._payload, list):\r | |
191 | raise TypeError('Expected list, got %s' % type(self._payload))\r | |
192 | else:\r | |
193 | payload = self._payload[i]\r | |
194 | if decode:\r | |
195 | if self.is_multipart():\r | |
196 | return None\r | |
197 | cte = self.get('content-transfer-encoding', '').lower()\r | |
198 | if cte == 'quoted-printable':\r | |
199 | return utils._qdecode(payload)\r | |
200 | elif cte == 'base64':\r | |
201 | try:\r | |
202 | return utils._bdecode(payload)\r | |
203 | except binascii.Error:\r | |
204 | # Incorrect padding\r | |
205 | return payload\r | |
206 | elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):\r | |
207 | sfp = StringIO()\r | |
208 | try:\r | |
209 | uu.decode(StringIO(payload+'\n'), sfp, quiet=True)\r | |
210 | payload = sfp.getvalue()\r | |
211 | except uu.Error:\r | |
212 | # Some decoding problem\r | |
213 | return payload\r | |
214 | # Everything else, including encodings with 8bit or 7bit are returned\r | |
215 | # unchanged.\r | |
216 | return payload\r | |
217 | \r | |
218 | def set_payload(self, payload, charset=None):\r | |
219 | """Set the payload to the given value.\r | |
220 | \r | |
221 | Optional charset sets the message's default character set. See\r | |
222 | set_charset() for details.\r | |
223 | """\r | |
224 | self._payload = payload\r | |
225 | if charset is not None:\r | |
226 | self.set_charset(charset)\r | |
227 | \r | |
228 | def set_charset(self, charset):\r | |
229 | """Set the charset of the payload to a given character set.\r | |
230 | \r | |
231 | charset can be a Charset instance, a string naming a character set, or\r | |
232 | None. If it is a string it will be converted to a Charset instance.\r | |
233 | If charset is None, the charset parameter will be removed from the\r | |
234 | Content-Type field. Anything else will generate a TypeError.\r | |
235 | \r | |
236 | The message will be assumed to be of type text/* encoded with\r | |
237 | charset.input_charset. It will be converted to charset.output_charset\r | |
238 | and encoded properly, if needed, when generating the plain text\r | |
239 | representation of the message. MIME headers (MIME-Version,\r | |
240 | Content-Type, Content-Transfer-Encoding) will be added as needed.\r | |
241 | \r | |
242 | """\r | |
243 | if charset is None:\r | |
244 | self.del_param('charset')\r | |
245 | self._charset = None\r | |
246 | return\r | |
247 | if isinstance(charset, basestring):\r | |
248 | charset = email.charset.Charset(charset)\r | |
249 | if not isinstance(charset, email.charset.Charset):\r | |
250 | raise TypeError(charset)\r | |
251 | # BAW: should we accept strings that can serve as arguments to the\r | |
252 | # Charset constructor?\r | |
253 | self._charset = charset\r | |
254 | if 'MIME-Version' not in self:\r | |
255 | self.add_header('MIME-Version', '1.0')\r | |
256 | if 'Content-Type' not in self:\r | |
257 | self.add_header('Content-Type', 'text/plain',\r | |
258 | charset=charset.get_output_charset())\r | |
259 | else:\r | |
260 | self.set_param('charset', charset.get_output_charset())\r | |
261 | if isinstance(self._payload, unicode):\r | |
262 | self._payload = self._payload.encode(charset.output_charset)\r | |
263 | if str(charset) != charset.get_output_charset():\r | |
264 | self._payload = charset.body_encode(self._payload)\r | |
265 | if 'Content-Transfer-Encoding' not in self:\r | |
266 | cte = charset.get_body_encoding()\r | |
267 | try:\r | |
268 | cte(self)\r | |
269 | except TypeError:\r | |
270 | self._payload = charset.body_encode(self._payload)\r | |
271 | self.add_header('Content-Transfer-Encoding', cte)\r | |
272 | \r | |
273 | def get_charset(self):\r | |
274 | """Return the Charset instance associated with the message's payload.\r | |
275 | """\r | |
276 | return self._charset\r | |
277 | \r | |
278 | #\r | |
279 | # MAPPING INTERFACE (partial)\r | |
280 | #\r | |
281 | def __len__(self):\r | |
282 | """Return the total number of headers, including duplicates."""\r | |
283 | return len(self._headers)\r | |
284 | \r | |
285 | def __getitem__(self, name):\r | |
286 | """Get a header value.\r | |
287 | \r | |
288 | Return None if the header is missing instead of raising an exception.\r | |
289 | \r | |
290 | Note that if the header appeared multiple times, exactly which\r | |
291 | occurrence gets returned is undefined. Use get_all() to get all\r | |
292 | the values matching a header field name.\r | |
293 | """\r | |
294 | return self.get(name)\r | |
295 | \r | |
296 | def __setitem__(self, name, val):\r | |
297 | """Set the value of a header.\r | |
298 | \r | |
299 | Note: this does not overwrite an existing header with the same field\r | |
300 | name. Use __delitem__() first to delete any existing headers.\r | |
301 | """\r | |
302 | self._headers.append((name, val))\r | |
303 | \r | |
304 | def __delitem__(self, name):\r | |
305 | """Delete all occurrences of a header, if present.\r | |
306 | \r | |
307 | Does not raise an exception if the header is missing.\r | |
308 | """\r | |
309 | name = name.lower()\r | |
310 | newheaders = []\r | |
311 | for k, v in self._headers:\r | |
312 | if k.lower() != name:\r | |
313 | newheaders.append((k, v))\r | |
314 | self._headers = newheaders\r | |
315 | \r | |
316 | def __contains__(self, name):\r | |
317 | return name.lower() in [k.lower() for k, v in self._headers]\r | |
318 | \r | |
319 | def has_key(self, name):\r | |
320 | """Return true if the message contains the header."""\r | |
321 | missing = object()\r | |
322 | return self.get(name, missing) is not missing\r | |
323 | \r | |
324 | def keys(self):\r | |
325 | """Return a list of all the message's header field names.\r | |
326 | \r | |
327 | These will be sorted in the order they appeared in the original\r | |
328 | message, or were added to the message, and may contain duplicates.\r | |
329 | Any fields deleted and re-inserted are always appended to the header\r | |
330 | list.\r | |
331 | """\r | |
332 | return [k for k, v in self._headers]\r | |
333 | \r | |
334 | def values(self):\r | |
335 | """Return a list of all the message's header values.\r | |
336 | \r | |
337 | These will be sorted in the order they appeared in the original\r | |
338 | message, or were added to the message, and may contain duplicates.\r | |
339 | Any fields deleted and re-inserted are always appended to the header\r | |
340 | list.\r | |
341 | """\r | |
342 | return [v for k, v in self._headers]\r | |
343 | \r | |
344 | def items(self):\r | |
345 | """Get all the message's header fields and values.\r | |
346 | \r | |
347 | These will be sorted in the order they appeared in the original\r | |
348 | message, or were added to the message, and may contain duplicates.\r | |
349 | Any fields deleted and re-inserted are always appended to the header\r | |
350 | list.\r | |
351 | """\r | |
352 | return self._headers[:]\r | |
353 | \r | |
354 | def get(self, name, failobj=None):\r | |
355 | """Get a header value.\r | |
356 | \r | |
357 | Like __getitem__() but return failobj instead of None when the field\r | |
358 | is missing.\r | |
359 | """\r | |
360 | name = name.lower()\r | |
361 | for k, v in self._headers:\r | |
362 | if k.lower() == name:\r | |
363 | return v\r | |
364 | return failobj\r | |
365 | \r | |
366 | #\r | |
367 | # Additional useful stuff\r | |
368 | #\r | |
369 | \r | |
370 | def get_all(self, name, failobj=None):\r | |
371 | """Return a list of all the values for the named field.\r | |
372 | \r | |
373 | These will be sorted in the order they appeared in the original\r | |
374 | message, and may contain duplicates. Any fields deleted and\r | |
375 | re-inserted are always appended to the header list.\r | |
376 | \r | |
377 | If no such fields exist, failobj is returned (defaults to None).\r | |
378 | """\r | |
379 | values = []\r | |
380 | name = name.lower()\r | |
381 | for k, v in self._headers:\r | |
382 | if k.lower() == name:\r | |
383 | values.append(v)\r | |
384 | if not values:\r | |
385 | return failobj\r | |
386 | return values\r | |
387 | \r | |
388 | def add_header(self, _name, _value, **_params):\r | |
389 | """Extended header setting.\r | |
390 | \r | |
391 | name is the header field to add. keyword arguments can be used to set\r | |
392 | additional parameters for the header field, with underscores converted\r | |
393 | to dashes. Normally the parameter will be added as key="value" unless\r | |
394 | value is None, in which case only the key will be added. If a\r | |
395 | parameter value contains non-ASCII characters it must be specified as a\r | |
396 | three-tuple of (charset, language, value), in which case it will be\r | |
397 | encoded according to RFC2231 rules.\r | |
398 | \r | |
399 | Example:\r | |
400 | \r | |
401 | msg.add_header('content-disposition', 'attachment', filename='bud.gif')\r | |
402 | """\r | |
403 | parts = []\r | |
404 | for k, v in _params.items():\r | |
405 | if v is None:\r | |
406 | parts.append(k.replace('_', '-'))\r | |
407 | else:\r | |
408 | parts.append(_formatparam(k.replace('_', '-'), v))\r | |
409 | if _value is not None:\r | |
410 | parts.insert(0, _value)\r | |
411 | self._headers.append((_name, SEMISPACE.join(parts)))\r | |
412 | \r | |
413 | def replace_header(self, _name, _value):\r | |
414 | """Replace a header.\r | |
415 | \r | |
416 | Replace the first matching header found in the message, retaining\r | |
417 | header order and case. If no matching header was found, a KeyError is\r | |
418 | raised.\r | |
419 | """\r | |
420 | _name = _name.lower()\r | |
421 | for i, (k, v) in zip(range(len(self._headers)), self._headers):\r | |
422 | if k.lower() == _name:\r | |
423 | self._headers[i] = (k, _value)\r | |
424 | break\r | |
425 | else:\r | |
426 | raise KeyError(_name)\r | |
427 | \r | |
428 | #\r | |
429 | # Use these three methods instead of the three above.\r | |
430 | #\r | |
431 | \r | |
432 | def get_content_type(self):\r | |
433 | """Return the message's content type.\r | |
434 | \r | |
435 | The returned string is coerced to lower case of the form\r | |
436 | `maintype/subtype'. If there was no Content-Type header in the\r | |
437 | message, the default type as given by get_default_type() will be\r | |
438 | returned. Since according to RFC 2045, messages always have a default\r | |
439 | type this will always return a value.\r | |
440 | \r | |
441 | RFC 2045 defines a message's default type to be text/plain unless it\r | |
442 | appears inside a multipart/digest container, in which case it would be\r | |
443 | message/rfc822.\r | |
444 | """\r | |
445 | missing = object()\r | |
446 | value = self.get('content-type', missing)\r | |
447 | if value is missing:\r | |
448 | # This should have no parameters\r | |
449 | return self.get_default_type()\r | |
450 | ctype = _splitparam(value)[0].lower()\r | |
451 | # RFC 2045, section 5.2 says if its invalid, use text/plain\r | |
452 | if ctype.count('/') != 1:\r | |
453 | return 'text/plain'\r | |
454 | return ctype\r | |
455 | \r | |
456 | def get_content_maintype(self):\r | |
457 | """Return the message's main content type.\r | |
458 | \r | |
459 | This is the `maintype' part of the string returned by\r | |
460 | get_content_type().\r | |
461 | """\r | |
462 | ctype = self.get_content_type()\r | |
463 | return ctype.split('/')[0]\r | |
464 | \r | |
465 | def get_content_subtype(self):\r | |
466 | """Returns the message's sub-content type.\r | |
467 | \r | |
468 | This is the `subtype' part of the string returned by\r | |
469 | get_content_type().\r | |
470 | """\r | |
471 | ctype = self.get_content_type()\r | |
472 | return ctype.split('/')[1]\r | |
473 | \r | |
474 | def get_default_type(self):\r | |
475 | """Return the `default' content type.\r | |
476 | \r | |
477 | Most messages have a default content type of text/plain, except for\r | |
478 | messages that are subparts of multipart/digest containers. Such\r | |
479 | subparts have a default content type of message/rfc822.\r | |
480 | """\r | |
481 | return self._default_type\r | |
482 | \r | |
483 | def set_default_type(self, ctype):\r | |
484 | """Set the `default' content type.\r | |
485 | \r | |
486 | ctype should be either "text/plain" or "message/rfc822", although this\r | |
487 | is not enforced. The default content type is not stored in the\r | |
488 | Content-Type header.\r | |
489 | """\r | |
490 | self._default_type = ctype\r | |
491 | \r | |
492 | def _get_params_preserve(self, failobj, header):\r | |
493 | # Like get_params() but preserves the quoting of values. BAW:\r | |
494 | # should this be part of the public interface?\r | |
495 | missing = object()\r | |
496 | value = self.get(header, missing)\r | |
497 | if value is missing:\r | |
498 | return failobj\r | |
499 | params = []\r | |
500 | for p in _parseparam(';' + value):\r | |
501 | try:\r | |
502 | name, val = p.split('=', 1)\r | |
503 | name = name.strip()\r | |
504 | val = val.strip()\r | |
505 | except ValueError:\r | |
506 | # Must have been a bare attribute\r | |
507 | name = p.strip()\r | |
508 | val = ''\r | |
509 | params.append((name, val))\r | |
510 | params = utils.decode_params(params)\r | |
511 | return params\r | |
512 | \r | |
513 | def get_params(self, failobj=None, header='content-type', unquote=True):\r | |
514 | """Return the message's Content-Type parameters, as a list.\r | |
515 | \r | |
516 | The elements of the returned list are 2-tuples of key/value pairs, as\r | |
517 | split on the `=' sign. The left hand side of the `=' is the key,\r | |
518 | while the right hand side is the value. If there is no `=' sign in\r | |
519 | the parameter the value is the empty string. The value is as\r | |
520 | described in the get_param() method.\r | |
521 | \r | |
522 | Optional failobj is the object to return if there is no Content-Type\r | |
523 | header. Optional header is the header to search instead of\r | |
524 | Content-Type. If unquote is True, the value is unquoted.\r | |
525 | """\r | |
526 | missing = object()\r | |
527 | params = self._get_params_preserve(missing, header)\r | |
528 | if params is missing:\r | |
529 | return failobj\r | |
530 | if unquote:\r | |
531 | return [(k, _unquotevalue(v)) for k, v in params]\r | |
532 | else:\r | |
533 | return params\r | |
534 | \r | |
535 | def get_param(self, param, failobj=None, header='content-type',\r | |
536 | unquote=True):\r | |
537 | """Return the parameter value if found in the Content-Type header.\r | |
538 | \r | |
539 | Optional failobj is the object to return if there is no Content-Type\r | |
540 | header, or the Content-Type header has no such parameter. Optional\r | |
541 | header is the header to search instead of Content-Type.\r | |
542 | \r | |
543 | Parameter keys are always compared case insensitively. The return\r | |
544 | value can either be a string, or a 3-tuple if the parameter was RFC\r | |
545 | 2231 encoded. When it's a 3-tuple, the elements of the value are of\r | |
546 | the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and\r | |
547 | LANGUAGE can be None, in which case you should consider VALUE to be\r | |
548 | encoded in the us-ascii charset. You can usually ignore LANGUAGE.\r | |
549 | \r | |
550 | Your application should be prepared to deal with 3-tuple return\r | |
551 | values, and can convert the parameter to a Unicode string like so:\r | |
552 | \r | |
553 | param = msg.get_param('foo')\r | |
554 | if isinstance(param, tuple):\r | |
555 | param = unicode(param[2], param[0] or 'us-ascii')\r | |
556 | \r | |
557 | In any case, the parameter value (either the returned string, or the\r | |
558 | VALUE item in the 3-tuple) is always unquoted, unless unquote is set\r | |
559 | to False.\r | |
560 | """\r | |
561 | if header not in self:\r | |
562 | return failobj\r | |
563 | for k, v in self._get_params_preserve(failobj, header):\r | |
564 | if k.lower() == param.lower():\r | |
565 | if unquote:\r | |
566 | return _unquotevalue(v)\r | |
567 | else:\r | |
568 | return v\r | |
569 | return failobj\r | |
570 | \r | |
571 | def set_param(self, param, value, header='Content-Type', requote=True,\r | |
572 | charset=None, language=''):\r | |
573 | """Set a parameter in the Content-Type header.\r | |
574 | \r | |
575 | If the parameter already exists in the header, its value will be\r | |
576 | replaced with the new value.\r | |
577 | \r | |
578 | If header is Content-Type and has not yet been defined for this\r | |
579 | message, it will be set to "text/plain" and the new parameter and\r | |
580 | value will be appended as per RFC 2045.\r | |
581 | \r | |
582 | An alternate header can specified in the header argument, and all\r | |
583 | parameters will be quoted as necessary unless requote is False.\r | |
584 | \r | |
585 | If charset is specified, the parameter will be encoded according to RFC\r | |
586 | 2231. Optional language specifies the RFC 2231 language, defaulting\r | |
587 | to the empty string. Both charset and language should be strings.\r | |
588 | """\r | |
589 | if not isinstance(value, tuple) and charset:\r | |
590 | value = (charset, language, value)\r | |
591 | \r | |
592 | if header not in self and header.lower() == 'content-type':\r | |
593 | ctype = 'text/plain'\r | |
594 | else:\r | |
595 | ctype = self.get(header)\r | |
596 | if not self.get_param(param, header=header):\r | |
597 | if not ctype:\r | |
598 | ctype = _formatparam(param, value, requote)\r | |
599 | else:\r | |
600 | ctype = SEMISPACE.join(\r | |
601 | [ctype, _formatparam(param, value, requote)])\r | |
602 | else:\r | |
603 | ctype = ''\r | |
604 | for old_param, old_value in self.get_params(header=header,\r | |
605 | unquote=requote):\r | |
606 | append_param = ''\r | |
607 | if old_param.lower() == param.lower():\r | |
608 | append_param = _formatparam(param, value, requote)\r | |
609 | else:\r | |
610 | append_param = _formatparam(old_param, old_value, requote)\r | |
611 | if not ctype:\r | |
612 | ctype = append_param\r | |
613 | else:\r | |
614 | ctype = SEMISPACE.join([ctype, append_param])\r | |
615 | if ctype != self.get(header):\r | |
616 | del self[header]\r | |
617 | self[header] = ctype\r | |
618 | \r | |
619 | def del_param(self, param, header='content-type', requote=True):\r | |
620 | """Remove the given parameter completely from the Content-Type header.\r | |
621 | \r | |
622 | The header will be re-written in place without the parameter or its\r | |
623 | value. All values will be quoted as necessary unless requote is\r | |
624 | False. Optional header specifies an alternative to the Content-Type\r | |
625 | header.\r | |
626 | """\r | |
627 | if header not in self:\r | |
628 | return\r | |
629 | new_ctype = ''\r | |
630 | for p, v in self.get_params(header=header, unquote=requote):\r | |
631 | if p.lower() != param.lower():\r | |
632 | if not new_ctype:\r | |
633 | new_ctype = _formatparam(p, v, requote)\r | |
634 | else:\r | |
635 | new_ctype = SEMISPACE.join([new_ctype,\r | |
636 | _formatparam(p, v, requote)])\r | |
637 | if new_ctype != self.get(header):\r | |
638 | del self[header]\r | |
639 | self[header] = new_ctype\r | |
640 | \r | |
641 | def set_type(self, type, header='Content-Type', requote=True):\r | |
642 | """Set the main type and subtype for the Content-Type header.\r | |
643 | \r | |
644 | type must be a string in the form "maintype/subtype", otherwise a\r | |
645 | ValueError is raised.\r | |
646 | \r | |
647 | This method replaces the Content-Type header, keeping all the\r | |
648 | parameters in place. If requote is False, this leaves the existing\r | |
649 | header's quoting as is. Otherwise, the parameters will be quoted (the\r | |
650 | default).\r | |
651 | \r | |
652 | An alternative header can be specified in the header argument. When\r | |
653 | the Content-Type header is set, we'll always also add a MIME-Version\r | |
654 | header.\r | |
655 | """\r | |
656 | # BAW: should we be strict?\r | |
657 | if not type.count('/') == 1:\r | |
658 | raise ValueError\r | |
659 | # Set the Content-Type, you get a MIME-Version\r | |
660 | if header.lower() == 'content-type':\r | |
661 | del self['mime-version']\r | |
662 | self['MIME-Version'] = '1.0'\r | |
663 | if header not in self:\r | |
664 | self[header] = type\r | |
665 | return\r | |
666 | params = self.get_params(header=header, unquote=requote)\r | |
667 | del self[header]\r | |
668 | self[header] = type\r | |
669 | # Skip the first param; it's the old type.\r | |
670 | for p, v in params[1:]:\r | |
671 | self.set_param(p, v, header, requote)\r | |
672 | \r | |
673 | def get_filename(self, failobj=None):\r | |
674 | """Return the filename associated with the payload if present.\r | |
675 | \r | |
676 | The filename is extracted from the Content-Disposition header's\r | |
677 | `filename' parameter, and it is unquoted. If that header is missing\r | |
678 | the `filename' parameter, this method falls back to looking for the\r | |
679 | `name' parameter.\r | |
680 | """\r | |
681 | missing = object()\r | |
682 | filename = self.get_param('filename', missing, 'content-disposition')\r | |
683 | if filename is missing:\r | |
684 | filename = self.get_param('name', missing, 'content-type')\r | |
685 | if filename is missing:\r | |
686 | return failobj\r | |
687 | return utils.collapse_rfc2231_value(filename).strip()\r | |
688 | \r | |
689 | def get_boundary(self, failobj=None):\r | |
690 | """Return the boundary associated with the payload if present.\r | |
691 | \r | |
692 | The boundary is extracted from the Content-Type header's `boundary'\r | |
693 | parameter, and it is unquoted.\r | |
694 | """\r | |
695 | missing = object()\r | |
696 | boundary = self.get_param('boundary', missing)\r | |
697 | if boundary is missing:\r | |
698 | return failobj\r | |
699 | # RFC 2046 says that boundaries may begin but not end in w/s\r | |
700 | return utils.collapse_rfc2231_value(boundary).rstrip()\r | |
701 | \r | |
702 | def set_boundary(self, boundary):\r | |
703 | """Set the boundary parameter in Content-Type to 'boundary'.\r | |
704 | \r | |
705 | This is subtly different than deleting the Content-Type header and\r | |
706 | adding a new one with a new boundary parameter via add_header(). The\r | |
707 | main difference is that using the set_boundary() method preserves the\r | |
708 | order of the Content-Type header in the original message.\r | |
709 | \r | |
710 | HeaderParseError is raised if the message has no Content-Type header.\r | |
711 | """\r | |
712 | missing = object()\r | |
713 | params = self._get_params_preserve(missing, 'content-type')\r | |
714 | if params is missing:\r | |
715 | # There was no Content-Type header, and we don't know what type\r | |
716 | # to set it to, so raise an exception.\r | |
717 | raise errors.HeaderParseError('No Content-Type header found')\r | |
718 | newparams = []\r | |
719 | foundp = False\r | |
720 | for pk, pv in params:\r | |
721 | if pk.lower() == 'boundary':\r | |
722 | newparams.append(('boundary', '"%s"' % boundary))\r | |
723 | foundp = True\r | |
724 | else:\r | |
725 | newparams.append((pk, pv))\r | |
726 | if not foundp:\r | |
727 | # The original Content-Type header had no boundary attribute.\r | |
728 | # Tack one on the end. BAW: should we raise an exception\r | |
729 | # instead???\r | |
730 | newparams.append(('boundary', '"%s"' % boundary))\r | |
731 | # Replace the existing Content-Type header with the new value\r | |
732 | newheaders = []\r | |
733 | for h, v in self._headers:\r | |
734 | if h.lower() == 'content-type':\r | |
735 | parts = []\r | |
736 | for k, v in newparams:\r | |
737 | if v == '':\r | |
738 | parts.append(k)\r | |
739 | else:\r | |
740 | parts.append('%s=%s' % (k, v))\r | |
741 | newheaders.append((h, SEMISPACE.join(parts)))\r | |
742 | \r | |
743 | else:\r | |
744 | newheaders.append((h, v))\r | |
745 | self._headers = newheaders\r | |
746 | \r | |
747 | def get_content_charset(self, failobj=None):\r | |
748 | """Return the charset parameter of the Content-Type header.\r | |
749 | \r | |
750 | The returned string is always coerced to lower case. If there is no\r | |
751 | Content-Type header, or if that header has no charset parameter,\r | |
752 | failobj is returned.\r | |
753 | """\r | |
754 | missing = object()\r | |
755 | charset = self.get_param('charset', missing)\r | |
756 | if charset is missing:\r | |
757 | return failobj\r | |
758 | if isinstance(charset, tuple):\r | |
759 | # RFC 2231 encoded, so decode it, and it better end up as ascii.\r | |
760 | pcharset = charset[0] or 'us-ascii'\r | |
761 | try:\r | |
762 | # LookupError will be raised if the charset isn't known to\r | |
763 | # Python. UnicodeError will be raised if the encoded text\r | |
764 | # contains a character not in the charset.\r | |
765 | charset = unicode(charset[2], pcharset).encode('us-ascii')\r | |
766 | except (LookupError, UnicodeError):\r | |
767 | charset = charset[2]\r | |
768 | # charset character must be in us-ascii range\r | |
769 | try:\r | |
770 | if isinstance(charset, str):\r | |
771 | charset = unicode(charset, 'us-ascii')\r | |
772 | charset = charset.encode('us-ascii')\r | |
773 | except UnicodeError:\r | |
774 | return failobj\r | |
775 | # RFC 2046, $4.1.2 says charsets are not case sensitive\r | |
776 | return charset.lower()\r | |
777 | \r | |
778 | def get_charsets(self, failobj=None):\r | |
779 | """Return a list containing the charset(s) used in this message.\r | |
780 | \r | |
781 | The returned list of items describes the Content-Type headers'\r | |
782 | charset parameter for this message and all the subparts in its\r | |
783 | payload.\r | |
784 | \r | |
785 | Each item will either be a string (the value of the charset parameter\r | |
786 | in the Content-Type header of that part) or the value of the\r | |
787 | 'failobj' parameter (defaults to None), if the part does not have a\r | |
788 | main MIME type of "text", or the charset is not defined.\r | |
789 | \r | |
790 | The list will contain one string for each part of the message, plus\r | |
791 | one for the container message (i.e. self), so that a non-multipart\r | |
792 | message will still return a list of length 1.\r | |
793 | """\r | |
794 | return [part.get_content_charset(failobj) for part in self.walk()]\r | |
795 | \r | |
796 | # I.e. def walk(self): ...\r | |
797 | from email.iterators import walk\r |