[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.10 / Lib / encodings / idna.py

# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)\r
\r
import stringprep, re, codecs\r
from unicodedata import ucd_3_2_0 as unicodedata\r
\r
# IDNA section 3.1\r
dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]")\r
\r
# IDNA section 5\r
ace_prefix = "xn--"\r
uace_prefix = unicode(ace_prefix, "ascii")\r
\r
# This assumes query strings, so AllowUnassigned is true\r
def nameprep(label):\r
    # Map\r
    newlabel = []\r
    for c in label:\r
        if stringprep.in_table_b1(c):\r
            # Map to nothing\r
            continue\r
        newlabel.append(stringprep.map_table_b2(c))\r
    label = u"".join(newlabel)\r
\r
    # Normalize\r
    label = unicodedata.normalize("NFKC", label)\r
\r
    # Prohibit\r
    for c in label:\r
        if stringprep.in_table_c12(c) or \\r
           stringprep.in_table_c22(c) or \\r
           stringprep.in_table_c3(c) or \\r
           stringprep.in_table_c4(c) or \\r
           stringprep.in_table_c5(c) or \\r
           stringprep.in_table_c6(c) or \\r
           stringprep.in_table_c7(c) or \\r
           stringprep.in_table_c8(c) or \\r
           stringprep.in_table_c9(c):\r
            raise UnicodeError("Invalid character %r" % c)\r
\r
    # Check bidi\r
    RandAL = map(stringprep.in_table_d1, label)\r
    for c in RandAL:\r
        if c:\r
            # There is a RandAL char in the string. Must perform further\r
            # tests:\r
            # 1) The characters in section 5.8 MUST be prohibited.\r
            # This is table C.8, which was already checked\r
            # 2) If a string contains any RandALCat character, the string\r
            # MUST NOT contain any LCat character.\r
            if filter(stringprep.in_table_d2, label):\r
                raise UnicodeError("Violation of BIDI requirement 2")\r
\r
            # 3) If a string contains any RandALCat character, a\r
            # RandALCat character MUST be the first character of the\r
            # string, and a RandALCat character MUST be the last\r
            # character of the string.\r
            if not RandAL[0] or not RandAL[-1]:\r
                raise UnicodeError("Violation of BIDI requirement 3")\r
\r
    return label\r
\r
def ToASCII(label):\r
    try:\r
        # Step 1: try ASCII\r
        label = label.encode("ascii")\r
    except UnicodeError:\r
        pass\r
    else:\r
        # Skip to step 3: UseSTD3ASCIIRules is false, so\r
        # Skip to step 8.\r
        if 0 < len(label) < 64:\r
            return label\r
        raise UnicodeError("label empty or too long")\r
\r
    # Step 2: nameprep\r
    label = nameprep(label)\r
\r
    # Step 3: UseSTD3ASCIIRules is false\r
    # Step 4: try ASCII\r
    try:\r
        label = label.encode("ascii")\r
    except UnicodeError:\r
        pass\r
    else:\r
        # Skip to step 8.\r
        if 0 < len(label) < 64:\r
            return label\r
        raise UnicodeError("label empty or too long")\r
\r
    # Step 5: Check ACE prefix\r
    if label.startswith(uace_prefix):\r
        raise UnicodeError("Label starts with ACE prefix")\r
\r
    # Step 6: Encode with PUNYCODE\r
    label = label.encode("punycode")\r
\r
    # Step 7: Prepend ACE prefix\r
    label = ace_prefix + label\r
\r
    # Step 8: Check size\r
    if 0 < len(label) < 64:\r
        return label\r
    raise UnicodeError("label empty or too long")\r
\r
def ToUnicode(label):\r
    # Step 1: Check for ASCII\r
    if isinstance(label, str):\r
        pure_ascii = True\r
    else:\r
        try:\r
            label = label.encode("ascii")\r
            pure_ascii = True\r
        except UnicodeError:\r
            pure_ascii = False\r
    if not pure_ascii:\r
        # Step 2: Perform nameprep\r
        label = nameprep(label)\r
        # It doesn't say this, but apparently, it should be ASCII now\r
        try:\r
            label = label.encode("ascii")\r
        except UnicodeError:\r
            raise UnicodeError("Invalid character in IDN label")\r
    # Step 3: Check for ACE prefix\r
    if not label.startswith(ace_prefix):\r
        return unicode(label, "ascii")\r
\r
    # Step 4: Remove ACE prefix\r
    label1 = label[len(ace_prefix):]\r
\r
    # Step 5: Decode using PUNYCODE\r
    result = label1.decode("punycode")\r
\r
    # Step 6: Apply ToASCII\r
    label2 = ToASCII(result)\r
\r
    # Step 7: Compare the result of step 6 with the one of step 3\r
    # label2 will already be in lower case.\r
    if label.lower() != label2:\r
        raise UnicodeError("IDNA does not round-trip", label, label2)\r
\r
    # Step 8: return the result of step 5\r
    return result\r
\r
### Codec APIs\r
\r
class Codec(codecs.Codec):\r
    def encode(self,input,errors='strict'):\r
\r
        if errors != 'strict':\r
            # IDNA is quite clear that implementations must be strict\r
            raise UnicodeError("unsupported error handling "+errors)\r
\r
        if not input:\r
            return "", 0\r
\r
        result = []\r
        labels = dots.split(input)\r
        if labels and len(labels[-1])==0:\r
            trailing_dot = '.'\r
            del labels[-1]\r
        else:\r
            trailing_dot = ''\r
        for label in labels:\r
            result.append(ToASCII(label))\r
        # Join with U+002E\r
        return ".".join(result)+trailing_dot, len(input)\r
\r
    def decode(self,input,errors='strict'):\r
\r
        if errors != 'strict':\r
            raise UnicodeError("Unsupported error handling "+errors)\r
\r
        if not input:\r
            return u"", 0\r
\r
        # IDNA allows decoding to operate on Unicode strings, too.\r
        if isinstance(input, unicode):\r
            labels = dots.split(input)\r
        else:\r
            # Must be ASCII string\r
            input = str(input)\r
            unicode(input, "ascii")\r
            labels = input.split(".")\r
\r
        if labels and len(labels[-1]) == 0:\r
            trailing_dot = u'.'\r
            del labels[-1]\r
        else:\r
            trailing_dot = u''\r
\r
        result = []\r
        for label in labels:\r
            result.append(ToUnicode(label))\r
\r
        return u".".join(result)+trailing_dot, len(input)\r
\r
class IncrementalEncoder(codecs.BufferedIncrementalEncoder):\r
    def _buffer_encode(self, input, errors, final):\r
        if errors != 'strict':\r
            # IDNA is quite clear that implementations must be strict\r
            raise UnicodeError("unsupported error handling "+errors)\r
\r
        if not input:\r
            return ("", 0)\r
\r
        labels = dots.split(input)\r
        trailing_dot = u''\r
        if labels:\r
            if not labels[-1]:\r
                trailing_dot = '.'\r
                del labels[-1]\r
            elif not final:\r
                # Keep potentially unfinished label until the next call\r
                del labels[-1]\r
                if labels:\r
                    trailing_dot = '.'\r
\r
        result = []\r
        size = 0\r
        for label in labels:\r
            result.append(ToASCII(label))\r
            if size:\r
                size += 1\r
            size += len(label)\r
\r
        # Join with U+002E\r
        result = ".".join(result) + trailing_dot\r
        size += len(trailing_dot)\r
        return (result, size)\r
\r
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):\r
    def _buffer_decode(self, input, errors, final):\r
        if errors != 'strict':\r
            raise UnicodeError("Unsupported error handling "+errors)\r
\r
        if not input:\r
            return (u"", 0)\r
\r
        # IDNA allows decoding to operate on Unicode strings, too.\r
        if isinstance(input, unicode):\r
            labels = dots.split(input)\r
        else:\r
            # Must be ASCII string\r
            input = str(input)\r
            unicode(input, "ascii")\r
            labels = input.split(".")\r
\r
        trailing_dot = u''\r
        if labels:\r
            if not labels[-1]:\r
                trailing_dot = u'.'\r
                del labels[-1]\r
            elif not final:\r
                # Keep potentially unfinished label until the next call\r
                del labels[-1]\r
                if labels:\r
                    trailing_dot = u'.'\r
\r
        result = []\r
        size = 0\r
        for label in labels:\r
            result.append(ToUnicode(label))\r
            if size:\r
                size += 1\r
            size += len(label)\r
\r
        result = u".".join(result) + trailing_dot\r
        size += len(trailing_dot)\r
        return (result, size)\r
\r
class StreamWriter(Codec,codecs.StreamWriter):\r
    pass\r
\r
class StreamReader(Codec,codecs.StreamReader):\r
    pass\r
\r
### encodings module API\r
\r
def getregentry():\r
    return codecs.CodecInfo(\r
        name='idna',\r
        encode=Codec().encode,\r
        decode=Codec().decode,\r
        incrementalencoder=IncrementalEncoder,\r
        incrementaldecoder=IncrementalDecoder,\r
        streamwriter=StreamWriter,\r
        streamreader=StreamReader,\r
    )\r
Commit	Line	Data
3257aa99 DM	1	# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)\r
	2	\r
	3	import stringprep, re, codecs\r
	4	from unicodedata import ucd_3_2_0 as unicodedata\r
	5	\r
	6	# IDNA section 3.1\r
	7	dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]")\r
	8	\r
	9	# IDNA section 5\r
	10	ace_prefix = "xn--"\r
	11	uace_prefix = unicode(ace_prefix, "ascii")\r
	12	\r
	13	# This assumes query strings, so AllowUnassigned is true\r
	14	def nameprep(label):\r
	15	# Map\r
	16	newlabel = []\r
	17	for c in label:\r
	18	if stringprep.in_table_b1(c):\r
	19	# Map to nothing\r
	20	continue\r
	21	newlabel.append(stringprep.map_table_b2(c))\r
	22	label = u"".join(newlabel)\r
	23	\r
	24	# Normalize\r
	25	label = unicodedata.normalize("NFKC", label)\r
	26	\r
	27	# Prohibit\r
	28	for c in label:\r
	29	if stringprep.in_table_c12(c) or \\r
	30	stringprep.in_table_c22(c) or \\r
	31	stringprep.in_table_c3(c) or \\r
	32	stringprep.in_table_c4(c) or \\r
	33	stringprep.in_table_c5(c) or \\r
	34	stringprep.in_table_c6(c) or \\r
	35	stringprep.in_table_c7(c) or \\r
	36	stringprep.in_table_c8(c) or \\r
	37	stringprep.in_table_c9(c):\r
	38	raise UnicodeError("Invalid character %r" % c)\r
	39	\r
	40	# Check bidi\r
	41	RandAL = map(stringprep.in_table_d1, label)\r
	42	for c in RandAL:\r
	43	if c:\r
	44	# There is a RandAL char in the string. Must perform further\r
	45	# tests:\r
	46	# 1) The characters in section 5.8 MUST be prohibited.\r
	47	# This is table C.8, which was already checked\r
	48	# 2) If a string contains any RandALCat character, the string\r
	49	# MUST NOT contain any LCat character.\r
	50	if filter(stringprep.in_table_d2, label):\r
	51	raise UnicodeError("Violation of BIDI requirement 2")\r
	52	\r
	53	# 3) If a string contains any RandALCat character, a\r
	54	# RandALCat character MUST be the first character of the\r
	55	# string, and a RandALCat character MUST be the last\r
	56	# character of the string.\r
	57	if not RandAL[0] or not RandAL[-1]:\r
	58	raise UnicodeError("Violation of BIDI requirement 3")\r
	59	\r
	60	return label\r
	61	\r
	62	def ToASCII(label):\r
	63	try:\r
	64	# Step 1: try ASCII\r
65	label = label.encode("ascii")\r
66	except UnicodeError:\r
67	pass\r
68	else:\r
69	# Skip to step 3: UseSTD3ASCIIRules is false, so\r
70	# Skip to step 8.\r
71	if 0 < len(label) < 64:\r
72	return label\r
73	raise UnicodeError("label empty or too long")\r
74	\r
75	# Step 2: nameprep\r
76	label = nameprep(label)\r
77	\r
78	# Step 3: UseSTD3ASCIIRules is false\r
79	# Step 4: try ASCII\r
80	try:\r
81	label = label.encode("ascii")\r
82	except UnicodeError:\r
83	pass\r
84	else:\r
85	# Skip to step 8.\r
86	if 0 < len(label) < 64:\r
87	return label\r
88	raise UnicodeError("label empty or too long")\r
89	\r
90	# Step 5: Check ACE prefix\r
91	if label.startswith(uace_prefix):\r
92	raise UnicodeError("Label starts with ACE prefix")\r
93	\r
94	# Step 6: Encode with PUNYCODE\r
95	label = label.encode("punycode")\r
96	\r
97	# Step 7: Prepend ACE prefix\r
98	label = ace_prefix + label\r
99	\r
100	# Step 8: Check size\r
101	if 0 < len(label) < 64:\r
102	return label\r
103	raise UnicodeError("label empty or too long")\r
104	\r
105	def ToUnicode(label):\r
106	# Step 1: Check for ASCII\r
107	if isinstance(label, str):\r
108	pure_ascii = True\r
109	else:\r
110	try:\r
111	label = label.encode("ascii")\r
112	pure_ascii = True\r
113	except UnicodeError:\r
114	pure_ascii = False\r
115	if not pure_ascii:\r
116	# Step 2: Perform nameprep\r
117	label = nameprep(label)\r
118	# It doesn't say this, but apparently, it should be ASCII now\r
119	try:\r
120	label = label.encode("ascii")\r
121	except UnicodeError:\r
122	raise UnicodeError("Invalid character in IDN label")\r
123	# Step 3: Check for ACE prefix\r
124	if not label.startswith(ace_prefix):\r
125	return unicode(label, "ascii")\r
126	\r
127	# Step 4: Remove ACE prefix\r
128	label1 = label[len(ace_prefix):]\r
129	\r
130	# Step 5: Decode using PUNYCODE\r
131	result = label1.decode("punycode")\r
132	\r
133	# Step 6: Apply ToASCII\r
134	label2 = ToASCII(result)\r
135	\r
136	# Step 7: Compare the result of step 6 with the one of step 3\r
137	# label2 will already be in lower case.\r
138	if label.lower() != label2:\r
139	raise UnicodeError("IDNA does not round-trip", label, label2)\r
140	\r
141	# Step 8: return the result of step 5\r
142	return result\r
143	\r
144	### Codec APIs\r
145	\r
146	class Codec(codecs.Codec):\r
147	def encode(self,input,errors='strict'):\r
148	\r
149	if errors != 'strict':\r
150	# IDNA is quite clear that implementations must be strict\r
151	raise UnicodeError("unsupported error handling "+errors)\r
152	\r
153	if not input:\r
154	return "", 0\r
155	\r
156	result = []\r
157	labels = dots.split(input)\r
158	if labels and len(labels[-1])==0:\r
159	trailing_dot = '.'\r
160	del labels[-1]\r
161	else:\r
162	trailing_dot = ''\r
163	for label in labels:\r
164	result.append(ToASCII(label))\r
165	# Join with U+002E\r
166	return ".".join(result)+trailing_dot, len(input)\r
167	\r
168	def decode(self,input,errors='strict'):\r
169	\r
170	if errors != 'strict':\r
171	raise UnicodeError("Unsupported error handling "+errors)\r
172	\r
173	if not input:\r
174	return u"", 0\r
175	\r
176	# IDNA allows decoding to operate on Unicode strings, too.\r
177	if isinstance(input, unicode):\r
178	labels = dots.split(input)\r
179	else:\r
180	# Must be ASCII string\r
181	input = str(input)\r
182	unicode(input, "ascii")\r
183	labels = input.split(".")\r
184	\r
185	if labels and len(labels[-1]) == 0:\r
186	trailing_dot = u'.'\r
187	del labels[-1]\r
188	else:\r
189	trailing_dot = u''\r
190	\r
191	result = []\r
192	for label in labels:\r
193	result.append(ToUnicode(label))\r
194	\r
195	return u".".join(result)+trailing_dot, len(input)\r
196	\r
197	class IncrementalEncoder(codecs.BufferedIncrementalEncoder):\r
198	def _buffer_encode(self, input, errors, final):\r
199	if errors != 'strict':\r
200	# IDNA is quite clear that implementations must be strict\r
201	raise UnicodeError("unsupported error handling "+errors)\r
202	\r
203	if not input:\r
204	return ("", 0)\r
205	\r
206	labels = dots.split(input)\r
207	trailing_dot = u''\r
208	if labels:\r
209	if not labels[-1]:\r
210	trailing_dot = '.'\r
211	del labels[-1]\r
212	elif not final:\r
213	# Keep potentially unfinished label until the next call\r
214	del labels[-1]\r
215	if labels:\r
216	trailing_dot = '.'\r
217	\r
218	result = []\r
219	size = 0\r
220	for label in labels:\r
221	result.append(ToASCII(label))\r
222	if size:\r
223	size += 1\r
224	size += len(label)\r
225	\r
226	# Join with U+002E\r
227	result = ".".join(result) + trailing_dot\r
228	size += len(trailing_dot)\r
229	return (result, size)\r
230	\r
231	class IncrementalDecoder(codecs.BufferedIncrementalDecoder):\r
232	def _buffer_decode(self, input, errors, final):\r
233	if errors != 'strict':\r
234	raise UnicodeError("Unsupported error handling "+errors)\r
235	\r
236	if not input:\r
237	return (u"", 0)\r
238	\r
239	# IDNA allows decoding to operate on Unicode strings, too.\r
240	if isinstance(input, unicode):\r
241	labels = dots.split(input)\r
242	else:\r
243	# Must be ASCII string\r
244	input = str(input)\r
245	unicode(input, "ascii")\r
246	labels = input.split(".")\r
247	\r
248	trailing_dot = u''\r
249	if labels:\r
250	if not labels[-1]:\r
251	trailing_dot = u'.'\r
252	del labels[-1]\r
253	elif not final:\r
254	# Keep potentially unfinished label until the next call\r
255	del labels[-1]\r
256	if labels:\r
257	trailing_dot = u'.'\r
258	\r
259	result = []\r
260	size = 0\r
261	for label in labels:\r
262	result.append(ToUnicode(label))\r
263	if size:\r
264	size += 1\r
265	size += len(label)\r
266	\r
267	result = u".".join(result) + trailing_dot\r
268	size += len(trailing_dot)\r
269	return (result, size)\r
270	\r
271	class StreamWriter(Codec,codecs.StreamWriter):\r
272	pass\r
273	\r
274	class StreamReader(Codec,codecs.StreamReader):\r
275	pass\r
276	\r
277	### encodings module API\r
278	\r
279	def getregentry():\r
280	return codecs.CodecInfo(\r
281	name='idna',\r
282	encode=Codec().encode,\r
283	decode=Codec().decode,\r
284	incrementalencoder=IncrementalEncoder,\r
285	incrementaldecoder=IncrementalDecoder,\r
286	streamwriter=StreamWriter,\r
287	streamreader=StreamReader,\r
288	)\r