]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencodec.py
AppPkg/Applications/Python: Add Python 2.7.2 sources since the release of Python...
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Tools / unicode / gencodec.py
CommitLineData
4710c53d 1""" Unicode Mapping Parser and Codec Generator.\r
2\r
3This script parses Unicode mapping files as available from the Unicode\r
4site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec\r
5modules from them. The codecs use the standard character mapping codec\r
6to actually apply the mapping.\r
7\r
8Synopsis: gencodec.py dir codec_prefix\r
9\r
10All files in dir are scanned and those producing non-empty mappings\r
11will be written to <codec_prefix><mapname>.py with <mapname> being the\r
12first part of the map's filename ('a' in a.b.c.txt) converted to\r
13lowercase with hyphens replaced by underscores.\r
14\r
15The tool also writes marshalled versions of the mapping tables to the\r
16same location (with .mapping extension).\r
17\r
18Written by Marc-Andre Lemburg (mal@lemburg.com).\r
19\r
20(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.\r
21(c) Copyright Guido van Rossum, 2000.\r
22\r
23Table generation:\r
24(c) Copyright Marc-Andre Lemburg, 2005.\r
25 Licensed to PSF under a Contributor Agreement.\r
26\r
27"""#"\r
28\r
29import re, os, marshal, codecs\r
30\r
31# Maximum allowed size of charmap tables\r
32MAX_TABLE_SIZE = 8192\r
33\r
34# Standard undefined Unicode code point\r
35UNI_UNDEFINED = unichr(0xFFFE)\r
36\r
37mapRE = re.compile('((?:0x[0-9a-fA-F]+\+?)+)'\r
38 '\s+'\r
39 '((?:(?:0x[0-9a-fA-Z]+|<[A-Za-z]+>)\+?)*)'\r
40 '\s*'\r
41 '(#.+)?')\r
42\r
43def parsecodes(codes, len=len, range=range):\r
44\r
45 """ Converts code combinations to either a single code integer\r
46 or a tuple of integers.\r
47\r
48 meta-codes (in angular brackets, e.g. <LR> and <RL>) are\r
49 ignored.\r
50\r
51 Empty codes or illegal ones are returned as None.\r
52\r
53 """\r
54 if not codes:\r
55 return None\r
56 l = codes.split('+')\r
57 if len(l) == 1:\r
58 return int(l[0],16)\r
59 for i in range(len(l)):\r
60 try:\r
61 l[i] = int(l[i],16)\r
62 except ValueError:\r
63 l[i] = None\r
64 l = [x for x in l if x is not None]\r
65 if len(l) == 1:\r
66 return l[0]\r
67 else:\r
68 return tuple(l)\r
69\r
70def readmap(filename):\r
71\r
72 f = open(filename,'r')\r
73 lines = f.readlines()\r
74 f.close()\r
75 enc2uni = {}\r
76 identity = []\r
77 unmapped = range(256)\r
78\r
79 # UTC mapping tables per convention don't include the identity\r
80 # mappings for code points 0x00 - 0x1F and 0x7F, unless these are\r
81 # explicitly mapped to different characters or undefined\r
82 for i in range(32) + [127]:\r
83 identity.append(i)\r
84 unmapped.remove(i)\r
85 enc2uni[i] = (i, 'CONTROL CHARACTER')\r
86\r
87 for line in lines:\r
88 line = line.strip()\r
89 if not line or line[0] == '#':\r
90 continue\r
91 m = mapRE.match(line)\r
92 if not m:\r
93 #print '* not matched: %s' % repr(line)\r
94 continue\r
95 enc,uni,comment = m.groups()\r
96 enc = parsecodes(enc)\r
97 uni = parsecodes(uni)\r
98 if comment is None:\r
99 comment = ''\r
100 else:\r
101 comment = comment[1:].strip()\r
102 if enc < 256:\r
103 if enc in unmapped:\r
104 unmapped.remove(enc)\r
105 if enc == uni:\r
106 identity.append(enc)\r
107 enc2uni[enc] = (uni,comment)\r
108 else:\r
109 enc2uni[enc] = (uni,comment)\r
110\r
111 # If there are more identity-mapped entries than unmapped entries,\r
112 # it pays to generate an identity dictionary first, and add explicit\r
113 # mappings to None for the rest\r
114 if len(identity) >= len(unmapped):\r
115 for enc in unmapped:\r
116 enc2uni[enc] = (None, "")\r
117 enc2uni['IDENTITY'] = 256\r
118\r
119 return enc2uni\r
120\r
121def hexrepr(t, precision=4):\r
122\r
123 if t is None:\r
124 return 'None'\r
125 try:\r
126 len(t)\r
127 except:\r
128 return '0x%0*X' % (precision, t)\r
129 try:\r
130 return '(' + ', '.join(['0x%0*X' % (precision, item)\r
131 for item in t]) + ')'\r
132 except TypeError, why:\r
133 print '* failed to convert %r: %s' % (t, why)\r
134 raise\r
135\r
136def python_mapdef_code(varname, map, comments=1, precisions=(2, 4)):\r
137\r
138 l = []\r
139 append = l.append\r
140 if "IDENTITY" in map:\r
141 append("%s = codecs.make_identity_dict(range(%d))" %\r
142 (varname, map["IDENTITY"]))\r
143 append("%s.update({" % varname)\r
144 splits = 1\r
145 del map["IDENTITY"]\r
146 identity = 1\r
147 else:\r
148 append("%s = {" % varname)\r
149 splits = 0\r
150 identity = 0\r
151\r
152 mappings = sorted(map.items())\r
153 i = 0\r
154 key_precision, value_precision = precisions\r
155 for mapkey, mapvalue in mappings:\r
156 mapcomment = ''\r
157 if isinstance(mapkey, tuple):\r
158 (mapkey, mapcomment) = mapkey\r
159 if isinstance(mapvalue, tuple):\r
160 (mapvalue, mapcomment) = mapvalue\r
161 if mapkey is None:\r
162 continue\r
163 if (identity and\r
164 mapkey == mapvalue and\r
165 mapkey < 256):\r
166 # No need to include identity mappings, since these\r
167 # are already set for the first 256 code points.\r
168 continue\r
169 key = hexrepr(mapkey, key_precision)\r
170 value = hexrepr(mapvalue, value_precision)\r
171 if mapcomment and comments:\r
172 append(' %s: %s,\t# %s' % (key, value, mapcomment))\r
173 else:\r
174 append(' %s: %s,' % (key, value))\r
175 i += 1\r
176 if i == 4096:\r
177 # Split the definition into parts to that the Python\r
178 # parser doesn't dump core\r
179 if splits == 0:\r
180 append('}')\r
181 else:\r
182 append('})')\r
183 append('%s.update({' % varname)\r
184 i = 0\r
185 splits = splits + 1\r
186 if splits == 0:\r
187 append('}')\r
188 else:\r
189 append('})')\r
190\r
191 return l\r
192\r
193def python_tabledef_code(varname, map, comments=1, key_precision=2):\r
194\r
195 l = []\r
196 append = l.append\r
197 append('%s = (' % varname)\r
198\r
199 # Analyze map and create table dict\r
200 mappings = sorted(map.items())\r
201 table = {}\r
202 maxkey = 0\r
203 if 'IDENTITY' in map:\r
204 for key in range(256):\r
205 table[key] = (key, '')\r
206 maxkey = 255\r
207 del map['IDENTITY']\r
208 for mapkey, mapvalue in mappings:\r
209 mapcomment = ''\r
210 if isinstance(mapkey, tuple):\r
211 (mapkey, mapcomment) = mapkey\r
212 if isinstance(mapvalue, tuple):\r
213 (mapvalue, mapcomment) = mapvalue\r
214 if mapkey is None:\r
215 continue\r
216 table[mapkey] = (mapvalue, mapcomment)\r
217 if mapkey > maxkey:\r
218 maxkey = mapkey\r
219 if maxkey > MAX_TABLE_SIZE:\r
220 # Table too large\r
221 return None\r
222\r
223 # Create table code\r
224 for key in range(maxkey + 1):\r
225 if key not in table:\r
226 mapvalue = None\r
227 mapcomment = 'UNDEFINED'\r
228 else:\r
229 mapvalue, mapcomment = table[key]\r
230 if mapvalue is None:\r
231 mapchar = UNI_UNDEFINED\r
232 else:\r
233 if isinstance(mapvalue, tuple):\r
234 # 1-n mappings not supported\r
235 return None\r
236 else:\r
237 mapchar = unichr(mapvalue)\r
238 if mapcomment and comments:\r
239 append(' %r\t# %s -> %s' % (mapchar,\r
240 hexrepr(key, key_precision),\r
241 mapcomment))\r
242 else:\r
243 append(' %r' % mapchar)\r
244\r
245 append(')')\r
246 return l\r
247\r
248def codegen(name, map, encodingname, comments=1):\r
249\r
250 """ Returns Python source for the given map.\r
251\r
252 Comments are included in the source, if comments is true (default).\r
253\r
254 """\r
255 # Generate code\r
256 decoding_map_code = python_mapdef_code(\r
257 'decoding_map',\r
258 map,\r
259 comments=comments)\r
260 decoding_table_code = python_tabledef_code(\r
261 'decoding_table',\r
262 map,\r
263 comments=comments)\r
264 encoding_map_code = python_mapdef_code(\r
265 'encoding_map',\r
266 codecs.make_encoding_map(map),\r
267 comments=comments,\r
268 precisions=(4, 2))\r
269\r
270 if decoding_table_code:\r
271 suffix = 'table'\r
272 else:\r
273 suffix = 'map'\r
274\r
275 l = [\r
276 '''\\r
277""" Python Character Mapping Codec %s generated from '%s' with gencodec.py.\r
278\r
279"""#"\r
280\r
281import codecs\r
282\r
283### Codec APIs\r
284\r
285class Codec(codecs.Codec):\r
286\r
287 def encode(self,input,errors='strict'):\r
288 return codecs.charmap_encode(input,errors,encoding_%s)\r
289\r
290 def decode(self,input,errors='strict'):\r
291 return codecs.charmap_decode(input,errors,decoding_%s)\r
292''' % (encodingname, name, suffix, suffix)]\r
293 l.append('''\\r
294class IncrementalEncoder(codecs.IncrementalEncoder):\r
295 def encode(self, input, final=False):\r
296 return codecs.charmap_encode(input,self.errors,encoding_%s)[0]\r
297\r
298class IncrementalDecoder(codecs.IncrementalDecoder):\r
299 def decode(self, input, final=False):\r
300 return codecs.charmap_decode(input,self.errors,decoding_%s)[0]''' %\r
301 (suffix, suffix))\r
302\r
303 l.append('''\r
304class StreamWriter(Codec,codecs.StreamWriter):\r
305 pass\r
306\r
307class StreamReader(Codec,codecs.StreamReader):\r
308 pass\r
309\r
310### encodings module API\r
311\r
312def getregentry():\r
313 return codecs.CodecInfo(\r
314 name=%r,\r
315 encode=Codec().encode,\r
316 decode=Codec().decode,\r
317 incrementalencoder=IncrementalEncoder,\r
318 incrementaldecoder=IncrementalDecoder,\r
319 streamreader=StreamReader,\r
320 streamwriter=StreamWriter,\r
321 )\r
322''' % encodingname.replace('_', '-'))\r
323\r
324 # Add decoding table or map (with preference to the table)\r
325 if not decoding_table_code:\r
326 l.append('''\r
327### Decoding Map\r
328''')\r
329 l.extend(decoding_map_code)\r
330 else:\r
331 l.append('''\r
332### Decoding Table\r
333''')\r
334 l.extend(decoding_table_code)\r
335\r
336 # Add encoding map\r
337 if decoding_table_code:\r
338 l.append('''\r
339### Encoding table\r
340encoding_table=codecs.charmap_build(decoding_table)\r
341''')\r
342 else:\r
343 l.append('''\r
344### Encoding Map\r
345''')\r
346 l.extend(encoding_map_code)\r
347\r
348 # Final new-line\r
349 l.append('')\r
350\r
351 return '\n'.join(l).expandtabs()\r
352\r
353def pymap(name,map,pyfile,encodingname,comments=1):\r
354\r
355 code = codegen(name,map,encodingname,comments)\r
356 f = open(pyfile,'w')\r
357 f.write(code)\r
358 f.close()\r
359\r
360def marshalmap(name,map,marshalfile):\r
361\r
362 d = {}\r
363 for e,(u,c) in map.items():\r
364 d[e] = (u,c)\r
365 f = open(marshalfile,'wb')\r
366 marshal.dump(d,f)\r
367 f.close()\r
368\r
369def convertdir(dir, dirprefix='', nameprefix='', comments=1):\r
370\r
371 mapnames = os.listdir(dir)\r
372 for mapname in mapnames:\r
373 mappathname = os.path.join(dir, mapname)\r
374 if not os.path.isfile(mappathname):\r
375 continue\r
376 name = os.path.split(mapname)[1]\r
377 name = name.replace('-','_')\r
378 name = name.split('.')[0]\r
379 name = name.lower()\r
380 name = nameprefix + name\r
381 codefile = name + '.py'\r
382 marshalfile = name + '.mapping'\r
383 print 'converting %s to %s and %s' % (mapname,\r
384 dirprefix + codefile,\r
385 dirprefix + marshalfile)\r
386 try:\r
387 map = readmap(os.path.join(dir,mapname))\r
388 if not map:\r
389 print '* map is empty; skipping'\r
390 else:\r
391 pymap(mappathname, map, dirprefix + codefile,name,comments)\r
392 marshalmap(mappathname, map, dirprefix + marshalfile)\r
393 except ValueError, why:\r
394 print '* conversion failed: %s' % why\r
395 raise\r
396\r
397def rewritepythondir(dir, dirprefix='', comments=1):\r
398\r
399 mapnames = os.listdir(dir)\r
400 for mapname in mapnames:\r
401 if not mapname.endswith('.mapping'):\r
402 continue\r
403 name = mapname[:-len('.mapping')]\r
404 codefile = name + '.py'\r
405 print 'converting %s to %s' % (mapname,\r
406 dirprefix + codefile)\r
407 try:\r
408 map = marshal.load(open(os.path.join(dir,mapname),\r
409 'rb'))\r
410 if not map:\r
411 print '* map is empty; skipping'\r
412 else:\r
413 pymap(mapname, map, dirprefix + codefile,name,comments)\r
414 except ValueError, why:\r
415 print '* conversion failed: %s' % why\r
416\r
417if __name__ == '__main__':\r
418\r
419 import sys\r
420 if 1:\r
421 convertdir(*sys.argv[1:])\r
422 else:\r
423 rewritepythondir(*sys.argv[1:])\r