[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Lib / email / quoprimime.py

# Copyright (C) 2001-2006 Python Software Foundation\r
# Author: Ben Gertzfield\r
# Contact: email-sig@python.org\r
\r
"""Quoted-printable content transfer encoding per RFCs 2045-2047.\r
\r
This module handles the content transfer encoding method defined in RFC 2045\r
to encode US ASCII-like 8-bit data called `quoted-printable'.  It is used to\r
safely encode text that is in a character set similar to the 7-bit US ASCII\r
character set, but that includes some 8-bit characters that are normally not\r
allowed in email bodies or headers.\r
\r
Quoted-printable is very space-inefficient for encoding binary files; use the\r
email.base64mime module for that instead.\r
\r
This module provides an interface to encode and decode both headers and bodies\r
with quoted-printable encoding.\r
\r
RFC 2045 defines a method for including character set information in an\r
`encoded-word' in a header.  This method is commonly used for 8-bit real names\r
in To:/From:/Cc: etc. fields, as well as Subject: lines.\r
\r
This module does not do the line wrapping or end-of-line character\r
conversion necessary for proper internationalized headers; it only\r
does dumb encoding and decoding.  To deal with the various line\r
wrapping issues, use the email.header module.\r
"""\r
\r
__all__ = [\r
    'body_decode',\r
    'body_encode',\r
    'body_quopri_check',\r
    'body_quopri_len',\r
    'decode',\r
    'decodestring',\r
    'encode',\r
    'encodestring',\r
    'header_decode',\r
    'header_encode',\r
    'header_quopri_check',\r
    'header_quopri_len',\r
    'quote',\r
    'unquote',\r
    ]\r
\r
import re\r
\r
from string import hexdigits\r
from email.utils import fix_eols\r
\r
CRLF = '\r\n'\r
NL = '\n'\r
\r
# See also Charset.py\r
MISC_LEN = 7\r
\r
hqre = re.compile(r'[^-a-zA-Z0-9!*+/ ]')\r
bqre = re.compile(r'[^ !-<>-~\t]')\r
\r
\r
\f\r
# Helpers\r
def header_quopri_check(c):\r
    """Return True if the character should be escaped with header quopri."""\r
    return bool(hqre.match(c))\r
\r
\r
def body_quopri_check(c):\r
    """Return True if the character should be escaped with body quopri."""\r
    return bool(bqre.match(c))\r
\r
\r
def header_quopri_len(s):\r
    """Return the length of str when it is encoded with header quopri."""\r
    count = 0\r
    for c in s:\r
        if hqre.match(c):\r
            count += 3\r
        else:\r
            count += 1\r
    return count\r
\r
\r
def body_quopri_len(str):\r
    """Return the length of str when it is encoded with body quopri."""\r
    count = 0\r
    for c in str:\r
        if bqre.match(c):\r
            count += 3\r
        else:\r
            count += 1\r
    return count\r
\r
\r
def _max_append(L, s, maxlen, extra=''):\r
    if not L:\r
        L.append(s.lstrip())\r
    elif len(L[-1]) + len(s) <= maxlen:\r
        L[-1] += extra + s\r
    else:\r
        L.append(s.lstrip())\r
\r
\r
def unquote(s):\r
    """Turn a string in the form =AB to the ASCII character with value 0xab"""\r
    return chr(int(s[1:3], 16))\r
\r
\r
def quote(c):\r
    return "=%02X" % ord(c)\r
\r
\r
\f\r
def header_encode(header, charset="iso-8859-1", keep_eols=False,\r
                  maxlinelen=76, eol=NL):\r
    """Encode a single header line with quoted-printable (like) encoding.\r
\r
    Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but\r
    used specifically for email header fields to allow charsets with mostly 7\r
    bit characters (and some 8 bit) to remain more or less readable in non-RFC\r
    2045 aware mail clients.\r
\r
    charset names the character set to use to encode the header.  It defaults\r
    to iso-8859-1.\r
\r
    The resulting string will be in the form:\r
\r
    "=?charset?q?I_f=E2rt_in_your_g=E8n=E8ral_dire=E7tion?\\n\r
      =?charset?q?Silly_=C8nglish_Kn=EEghts?="\r
\r
    with each line wrapped safely at, at most, maxlinelen characters (defaults\r
    to 76 characters).  If maxlinelen is None, the entire string is encoded in\r
    one chunk with no splitting.\r
\r
    End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted\r
    to the canonical email line separator \\r\\n unless the keep_eols\r
    parameter is True (the default is False).\r
\r
    Each line of the header will be terminated in the value of eol, which\r
    defaults to "\\n".  Set this to "\\r\\n" if you are using the result of\r
    this function directly in email.\r
    """\r
    # Return empty headers unchanged\r
    if not header:\r
        return header\r
\r
    if not keep_eols:\r
        header = fix_eols(header)\r
\r
    # Quopri encode each line, in encoded chunks no greater than maxlinelen in\r
    # length, after the RFC chrome is added in.\r
    quoted = []\r
    if maxlinelen is None:\r
        # An obnoxiously large number that's good enough\r
        max_encoded = 100000\r
    else:\r
        max_encoded = maxlinelen - len(charset) - MISC_LEN - 1\r
\r
    for c in header:\r
        # Space may be represented as _ instead of =20 for readability\r
        if c == ' ':\r
            _max_append(quoted, '_', max_encoded)\r
        # These characters can be included verbatim\r
        elif not hqre.match(c):\r
            _max_append(quoted, c, max_encoded)\r
        # Otherwise, replace with hex value like =E2\r
        else:\r
            _max_append(quoted, "=%02X" % ord(c), max_encoded)\r
\r
    # Now add the RFC chrome to each encoded chunk and glue the chunks\r
    # together.  BAW: should we be able to specify the leading whitespace in\r
    # the joiner?\r
    joiner = eol + ' '\r
    return joiner.join(['=?%s?q?%s?=' % (charset, line) for line in quoted])\r
\r
\r
\f\r
def encode(body, binary=False, maxlinelen=76, eol=NL):\r
    """Encode with quoted-printable, wrapping at maxlinelen characters.\r
\r
    If binary is False (the default), end-of-line characters will be converted\r
    to the canonical email end-of-line sequence \\r\\n.  Otherwise they will\r
    be left verbatim.\r
\r
    Each line of encoded text will end with eol, which defaults to "\\n".  Set\r
    this to "\\r\\n" if you will be using the result of this function directly\r
    in an email.\r
\r
    Each line will be wrapped at, at most, maxlinelen characters (defaults to\r
    76 characters).  Long lines will have the `soft linefeed' quoted-printable\r
    character "=" appended to them, so the decoded text will be identical to\r
    the original text.\r
    """\r
    if not body:\r
        return body\r
\r
    if not binary:\r
        body = fix_eols(body)\r
\r
    # BAW: We're accumulating the body text by string concatenation.  That\r
    # can't be very efficient, but I don't have time now to rewrite it.  It\r
    # just feels like this algorithm could be more efficient.\r
    encoded_body = ''\r
    lineno = -1\r
    # Preserve line endings here so we can check later to see an eol needs to\r
    # be added to the output later.\r
    lines = body.splitlines(1)\r
    for line in lines:\r
        # But strip off line-endings for processing this line.\r
        if line.endswith(CRLF):\r
            line = line[:-2]\r
        elif line[-1] in CRLF:\r
            line = line[:-1]\r
\r
        lineno += 1\r
        encoded_line = ''\r
        prev = None\r
        linelen = len(line)\r
        # Now we need to examine every character to see if it needs to be\r
        # quopri encoded.  BAW: again, string concatenation is inefficient.\r
        for j in range(linelen):\r
            c = line[j]\r
            prev = c\r
            if bqre.match(c):\r
                c = quote(c)\r
            elif j+1 == linelen:\r
                # Check for whitespace at end of line; special case\r
                if c not in ' \t':\r
                    encoded_line += c\r
                prev = c\r
                continue\r
            # Check to see to see if the line has reached its maximum length\r
            if len(encoded_line) + len(c) >= maxlinelen:\r
                encoded_body += encoded_line + '=' + eol\r
                encoded_line = ''\r
            encoded_line += c\r
        # Now at end of line..\r
        if prev and prev in ' \t':\r
            # Special case for whitespace at end of file\r
            if lineno + 1 == len(lines):\r
                prev = quote(prev)\r
                if len(encoded_line) + len(prev) > maxlinelen:\r
                    encoded_body += encoded_line + '=' + eol + prev\r
                else:\r
                    encoded_body += encoded_line + prev\r
            # Just normal whitespace at end of line\r
            else:\r
                encoded_body += encoded_line + prev + '=' + eol\r
            encoded_line = ''\r
        # Now look at the line we just finished and it has a line ending, we\r
        # need to add eol to the end of the line.\r
        if lines[lineno].endswith(CRLF) or lines[lineno][-1] in CRLF:\r
            encoded_body += encoded_line + eol\r
        else:\r
            encoded_body += encoded_line\r
        encoded_line = ''\r
    return encoded_body\r
\r
\r
# For convenience and backwards compatibility w/ standard base64 module\r
body_encode = encode\r
encodestring = encode\r
\r
\r
\f\r
# BAW: I'm not sure if the intent was for the signature of this function to be\r
# the same as base64MIME.decode() or not...\r
def decode(encoded, eol=NL):\r
    """Decode a quoted-printable string.\r
\r
    Lines are separated with eol, which defaults to \\n.\r
    """\r
    if not encoded:\r
        return encoded\r
    # BAW: see comment in encode() above.  Again, we're building up the\r
    # decoded string with string concatenation, which could be done much more\r
    # efficiently.\r
    decoded = ''\r
\r
    for line in encoded.splitlines():\r
        line = line.rstrip()\r
        if not line:\r
            decoded += eol\r
            continue\r
\r
        i = 0\r
        n = len(line)\r
        while i < n:\r
            c = line[i]\r
            if c != '=':\r
                decoded += c\r
                i += 1\r
            # Otherwise, c == "=".  Are we at the end of the line?  If so, add\r
            # a soft line break.\r
            elif i+1 == n:\r
                i += 1\r
                continue\r
            # Decode if in form =AB\r
            elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:\r
                decoded += unquote(line[i:i+3])\r
                i += 3\r
            # Otherwise, not in form =AB, pass literally\r
            else:\r
                decoded += c\r
                i += 1\r
\r
            if i == n:\r
                decoded += eol\r
    # Special case if original string did not end with eol\r
    if not encoded.endswith(eol) and decoded.endswith(eol):\r
        decoded = decoded[:-1]\r
    return decoded\r
\r
\r
# For convenience and backwards compatibility w/ standard base64 module\r
body_decode = decode\r
decodestring = decode\r
\r
\r
\f\r
def _unquote_match(match):\r
    """Turn a match in the form =AB to the ASCII character with value 0xab"""\r
    s = match.group(0)\r
    return unquote(s)\r
\r
\r
# Header decoding is done a bit differently\r
def header_decode(s):\r
    """Decode a string encoded with RFC 2045 MIME header `Q' encoding.\r
\r
    This function does not parse a full MIME header value encoded with\r
    quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use\r
    the high level email.header class for that functionality.\r
    """\r
    s = s.replace('_', ' ')\r
    return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s)\r
Commit	Line	Data
4710c53d	1	# Copyright (C) 2001-2006 Python Software Foundation\r
	2	# Author: Ben Gertzfield\r
	3	# Contact: email-sig@python.org\r
	4	\r
	5	"""Quoted-printable content transfer encoding per RFCs 2045-2047.\r
	6	\r
	7	This module handles the content transfer encoding method defined in RFC 2045\r
	8	to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to\r
	9	safely encode text that is in a character set similar to the 7-bit US ASCII\r
	10	character set, but that includes some 8-bit characters that are normally not\r
	11	allowed in email bodies or headers.\r
	12	\r
	13	Quoted-printable is very space-inefficient for encoding binary files; use the\r
	14	email.base64mime module for that instead.\r
	15	\r
	16	This module provides an interface to encode and decode both headers and bodies\r
	17	with quoted-printable encoding.\r
	18	\r
	19	RFC 2045 defines a method for including character set information in an\r
	20	`encoded-word' in a header. This method is commonly used for 8-bit real names\r
	21	in To:/From:/Cc: etc. fields, as well as Subject: lines.\r
	22	\r
	23	This module does not do the line wrapping or end-of-line character\r
	24	conversion necessary for proper internationalized headers; it only\r
	25	does dumb encoding and decoding. To deal with the various line\r
	26	wrapping issues, use the email.header module.\r
	27	"""\r
	28	\r
	29	__all__ = [\r
	30	'body_decode',\r
	31	'body_encode',\r
	32	'body_quopri_check',\r
	33	'body_quopri_len',\r
	34	'decode',\r
	35	'decodestring',\r
	36	'encode',\r
	37	'encodestring',\r
	38	'header_decode',\r
	39	'header_encode',\r
	40	'header_quopri_check',\r
	41	'header_quopri_len',\r
	42	'quote',\r
	43	'unquote',\r
	44	]\r
	45	\r
	46	import re\r
	47	\r
	48	from string import hexdigits\r
	49	from email.utils import fix_eols\r
	50	\r
	51	CRLF = '\r\n'\r
	52	NL = '\n'\r
	53	\r
	54	# See also Charset.py\r
	55	MISC_LEN = 7\r
	56	\r
	57	hqre = re.compile(r'[^-a-zA-Z0-9!*+/ ]')\r
	58	bqre = re.compile(r'[^ !-<>-~\t]')\r
	59	\r
	60	\r
	61	\f\r
	62	# Helpers\r
	63	def header_quopri_check(c):\r
	64	"""Return True if the character should be escaped with header quopri."""\r
65	return bool(hqre.match(c))\r
66	\r
67	\r
68	def body_quopri_check(c):\r
69	"""Return True if the character should be escaped with body quopri."""\r
70	return bool(bqre.match(c))\r
71	\r
72	\r
73	def header_quopri_len(s):\r
74	"""Return the length of str when it is encoded with header quopri."""\r
75	count = 0\r
76	for c in s:\r
77	if hqre.match(c):\r
78	count += 3\r
79	else:\r
80	count += 1\r
81	return count\r
82	\r
83	\r
84	def body_quopri_len(str):\r
85	"""Return the length of str when it is encoded with body quopri."""\r
86	count = 0\r
87	for c in str:\r
88	if bqre.match(c):\r
89	count += 3\r
90	else:\r
91	count += 1\r
92	return count\r
93	\r
94	\r
95	def _max_append(L, s, maxlen, extra=''):\r
96	if not L:\r
97	L.append(s.lstrip())\r
98	elif len(L[-1]) + len(s) <= maxlen:\r
99	L[-1] += extra + s\r
100	else:\r
101	L.append(s.lstrip())\r
102	\r
103	\r
104	def unquote(s):\r
105	"""Turn a string in the form =AB to the ASCII character with value 0xab"""\r
106	return chr(int(s[1:3], 16))\r
107	\r
108	\r
109	def quote(c):\r
110	return "=%02X" % ord(c)\r
111	\r
112	\r
113	\f\r
114	def header_encode(header, charset="iso-8859-1", keep_eols=False,\r
115	maxlinelen=76, eol=NL):\r
116	"""Encode a single header line with quoted-printable (like) encoding.\r
117	\r
118	Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but\r
119	used specifically for email header fields to allow charsets with mostly 7\r
120	bit characters (and some 8 bit) to remain more or less readable in non-RFC\r
121	2045 aware mail clients.\r
122	\r
123	charset names the character set to use to encode the header. It defaults\r
124	to iso-8859-1.\r
125	\r
126	The resulting string will be in the form:\r
127	\r
128	"=?charset?q?I_f=E2rt_in_your_g=E8n=E8ral_dire=E7tion?\\n\r
129	=?charset?q?Silly_=C8nglish_Kn=EEghts?="\r
130	\r
131	with each line wrapped safely at, at most, maxlinelen characters (defaults\r
132	to 76 characters). If maxlinelen is None, the entire string is encoded in\r
133	one chunk with no splitting.\r
134	\r
135	End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted\r
136	to the canonical email line separator \\r\\n unless the keep_eols\r
137	parameter is True (the default is False).\r
138	\r
139	Each line of the header will be terminated in the value of eol, which\r
140	defaults to "\\n". Set this to "\\r\\n" if you are using the result of\r
141	this function directly in email.\r
142	"""\r
143	# Return empty headers unchanged\r
144	if not header:\r
145	return header\r
146	\r
147	if not keep_eols:\r
148	header = fix_eols(header)\r
149	\r
150	# Quopri encode each line, in encoded chunks no greater than maxlinelen in\r
151	# length, after the RFC chrome is added in.\r
152	quoted = []\r
153	if maxlinelen is None:\r
154	# An obnoxiously large number that's good enough\r
155	max_encoded = 100000\r
156	else:\r
157	max_encoded = maxlinelen - len(charset) - MISC_LEN - 1\r
158	\r
159	for c in header:\r
160	# Space may be represented as _ instead of =20 for readability\r
161	if c == ' ':\r
162	_max_append(quoted, '_', max_encoded)\r
163	# These characters can be included verbatim\r
164	elif not hqre.match(c):\r
165	_max_append(quoted, c, max_encoded)\r
166	# Otherwise, replace with hex value like =E2\r
167	else:\r
168	_max_append(quoted, "=%02X" % ord(c), max_encoded)\r
169	\r
170	# Now add the RFC chrome to each encoded chunk and glue the chunks\r
171	# together. BAW: should we be able to specify the leading whitespace in\r
172	# the joiner?\r
173	joiner = eol + ' '\r
174	return joiner.join(['=?%s?q?%s?=' % (charset, line) for line in quoted])\r
175	\r
176	\r
177	\f\r
178	def encode(body, binary=False, maxlinelen=76, eol=NL):\r
179	"""Encode with quoted-printable, wrapping at maxlinelen characters.\r
180	\r
181	If binary is False (the default), end-of-line characters will be converted\r
182	to the canonical email end-of-line sequence \\r\\n. Otherwise they will\r
183	be left verbatim.\r
184	\r
185	Each line of encoded text will end with eol, which defaults to "\\n". Set\r
186	this to "\\r\\n" if you will be using the result of this function directly\r
187	in an email.\r
188	\r
189	Each line will be wrapped at, at most, maxlinelen characters (defaults to\r
190	76 characters). Long lines will have the `soft linefeed' quoted-printable\r
191	character "=" appended to them, so the decoded text will be identical to\r
192	the original text.\r
193	"""\r
194	if not body:\r
195	return body\r
196	\r
197	if not binary:\r
198	body = fix_eols(body)\r
199	\r
200	# BAW: We're accumulating the body text by string concatenation. That\r
201	# can't be very efficient, but I don't have time now to rewrite it. It\r
202	# just feels like this algorithm could be more efficient.\r
203	encoded_body = ''\r
204	lineno = -1\r
205	# Preserve line endings here so we can check later to see an eol needs to\r
206	# be added to the output later.\r
207	lines = body.splitlines(1)\r
208	for line in lines:\r
209	# But strip off line-endings for processing this line.\r
210	if line.endswith(CRLF):\r
211	line = line[:-2]\r
212	elif line[-1] in CRLF:\r
213	line = line[:-1]\r
214	\r
215	lineno += 1\r
216	encoded_line = ''\r
217	prev = None\r
218	linelen = len(line)\r
219	# Now we need to examine every character to see if it needs to be\r
220	# quopri encoded. BAW: again, string concatenation is inefficient.\r
221	for j in range(linelen):\r
222	c = line[j]\r
223	prev = c\r
224	if bqre.match(c):\r
225	c = quote(c)\r
226	elif j+1 == linelen:\r
227	# Check for whitespace at end of line; special case\r
228	if c not in ' \t':\r
229	encoded_line += c\r
230	prev = c\r
231	continue\r
232	# Check to see to see if the line has reached its maximum length\r
233	if len(encoded_line) + len(c) >= maxlinelen:\r
234	encoded_body += encoded_line + '=' + eol\r
235	encoded_line = ''\r
236	encoded_line += c\r
237	# Now at end of line..\r
238	if prev and prev in ' \t':\r
239	# Special case for whitespace at end of file\r
240	if lineno + 1 == len(lines):\r
241	prev = quote(prev)\r
242	if len(encoded_line) + len(prev) > maxlinelen:\r
243	encoded_body += encoded_line + '=' + eol + prev\r
244	else:\r
245	encoded_body += encoded_line + prev\r
246	# Just normal whitespace at end of line\r
247	else:\r
248	encoded_body += encoded_line + prev + '=' + eol\r
249	encoded_line = ''\r
250	# Now look at the line we just finished and it has a line ending, we\r
251	# need to add eol to the end of the line.\r
252	if lines[lineno].endswith(CRLF) or lines[lineno][-1] in CRLF:\r
253	encoded_body += encoded_line + eol\r
254	else:\r
255	encoded_body += encoded_line\r
256	encoded_line = ''\r
257	return encoded_body\r
258	\r
259	\r
260	# For convenience and backwards compatibility w/ standard base64 module\r
261	body_encode = encode\r
262	encodestring = encode\r
263	\r
264	\r
265	\f\r
266	# BAW: I'm not sure if the intent was for the signature of this function to be\r
267	# the same as base64MIME.decode() or not...\r
268	def decode(encoded, eol=NL):\r
269	"""Decode a quoted-printable string.\r
270	\r
271	Lines are separated with eol, which defaults to \\n.\r
272	"""\r
273	if not encoded:\r
274	return encoded\r
275	# BAW: see comment in encode() above. Again, we're building up the\r
276	# decoded string with string concatenation, which could be done much more\r
277	# efficiently.\r
278	decoded = ''\r
279	\r
280	for line in encoded.splitlines():\r
281	line = line.rstrip()\r
282	if not line:\r
283	decoded += eol\r
284	continue\r
285	\r
286	i = 0\r
287	n = len(line)\r
288	while i < n:\r
289	c = line[i]\r
290	if c != '=':\r
291	decoded += c\r
292	i += 1\r
293	# Otherwise, c == "=". Are we at the end of the line? If so, add\r
294	# a soft line break.\r
295	elif i+1 == n:\r
296	i += 1\r
297	continue\r
298	# Decode if in form =AB\r
299	elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:\r
300	decoded += unquote(line[i:i+3])\r
301	i += 3\r
302	# Otherwise, not in form =AB, pass literally\r
303	else:\r
304	decoded += c\r
305	i += 1\r
306	\r
307	if i == n:\r
308	decoded += eol\r
309	# Special case if original string did not end with eol\r
310	if not encoded.endswith(eol) and decoded.endswith(eol):\r
311	decoded = decoded[:-1]\r
312	return decoded\r
313	\r
314	\r
315	# For convenience and backwards compatibility w/ standard base64 module\r
316	body_decode = decode\r
317	decodestring = decode\r
318	\r
319	\r
320	\f\r
321	def _unquote_match(match):\r
322	"""Turn a match in the form =AB to the ASCII character with value 0xab"""\r
323	s = match.group(0)\r
324	return unquote(s)\r
325	\r
326	\r
327	# Header decoding is done a bit differently\r
328	def header_decode(s):\r
329	"""Decode a string encoded with RFC 2045 MIME header `Q' encoding.\r
330	\r
331	This function does not parse a full MIME header value encoded with\r
332	quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use\r
333	the high level email.header class for that functionality.\r
334	"""\r
335	s = s.replace('_', ' ')\r
336	return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s)\r