]> git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencodec.py
EmbeddedPkg: Extend NvVarStoreFormattedLib LIBRARY_CLASS
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Tools / unicode / gencodec.py
1 """ Unicode Mapping Parser and Codec Generator.
2
3 This script parses Unicode mapping files as available from the Unicode
4 site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec
5 modules from them. The codecs use the standard character mapping codec
6 to actually apply the mapping.
7
8 Synopsis: gencodec.py dir codec_prefix
9
10 All files in dir are scanned and those producing non-empty mappings
11 will be written to <codec_prefix><mapname>.py with <mapname> being the
12 first part of the map's filename ('a' in a.b.c.txt) converted to
13 lowercase with hyphens replaced by underscores.
14
15 The tool also writes marshalled versions of the mapping tables to the
16 same location (with .mapping extension).
17
18 Written by Marc-Andre Lemburg (mal@lemburg.com).
19
20 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
21 (c) Copyright Guido van Rossum, 2000.
22
23 Table generation:
24 (c) Copyright Marc-Andre Lemburg, 2005.
25 Licensed to PSF under a Contributor Agreement.
26
27 """#"
28
29 import re, os, marshal, codecs
30
31 # Maximum allowed size of charmap tables
32 MAX_TABLE_SIZE = 8192
33
34 # Standard undefined Unicode code point
35 UNI_UNDEFINED = unichr(0xFFFE)
36
37 mapRE = re.compile('((?:0x[0-9a-fA-F]+\+?)+)'
38 '\s+'
39 '((?:(?:0x[0-9a-fA-Z]+|<[A-Za-z]+>)\+?)*)'
40 '\s*'
41 '(#.+)?')
42
43 def parsecodes(codes, len=len, range=range):
44
45 """ Converts code combinations to either a single code integer
46 or a tuple of integers.
47
48 meta-codes (in angular brackets, e.g. <LR> and <RL>) are
49 ignored.
50
51 Empty codes or illegal ones are returned as None.
52
53 """
54 if not codes:
55 return None
56 l = codes.split('+')
57 if len(l) == 1:
58 return int(l[0],16)
59 for i in range(len(l)):
60 try:
61 l[i] = int(l[i],16)
62 except ValueError:
63 l[i] = None
64 l = [x for x in l if x is not None]
65 if len(l) == 1:
66 return l[0]
67 else:
68 return tuple(l)
69
70 def readmap(filename):
71
72 f = open(filename,'r')
73 lines = f.readlines()
74 f.close()
75 enc2uni = {}
76 identity = []
77 unmapped = range(256)
78
79 # UTC mapping tables per convention don't include the identity
80 # mappings for code points 0x00 - 0x1F and 0x7F, unless these are
81 # explicitly mapped to different characters or undefined
82 for i in range(32) + [127]:
83 identity.append(i)
84 unmapped.remove(i)
85 enc2uni[i] = (i, 'CONTROL CHARACTER')
86
87 for line in lines:
88 line = line.strip()
89 if not line or line[0] == '#':
90 continue
91 m = mapRE.match(line)
92 if not m:
93 #print '* not matched: %s' % repr(line)
94 continue
95 enc,uni,comment = m.groups()
96 enc = parsecodes(enc)
97 uni = parsecodes(uni)
98 if comment is None:
99 comment = ''
100 else:
101 comment = comment[1:].strip()
102 if enc < 256:
103 if enc in unmapped:
104 unmapped.remove(enc)
105 if enc == uni:
106 identity.append(enc)
107 enc2uni[enc] = (uni,comment)
108 else:
109 enc2uni[enc] = (uni,comment)
110
111 # If there are more identity-mapped entries than unmapped entries,
112 # it pays to generate an identity dictionary first, and add explicit
113 # mappings to None for the rest
114 if len(identity) >= len(unmapped):
115 for enc in unmapped:
116 enc2uni[enc] = (None, "")
117 enc2uni['IDENTITY'] = 256
118
119 return enc2uni
120
121 def hexrepr(t, precision=4):
122
123 if t is None:
124 return 'None'
125 try:
126 len(t)
127 except:
128 return '0x%0*X' % (precision, t)
129 try:
130 return '(' + ', '.join(['0x%0*X' % (precision, item)
131 for item in t]) + ')'
132 except TypeError, why:
133 print '* failed to convert %r: %s' % (t, why)
134 raise
135
136 def python_mapdef_code(varname, map, comments=1, precisions=(2, 4)):
137
138 l = []
139 append = l.append
140 if "IDENTITY" in map:
141 append("%s = codecs.make_identity_dict(range(%d))" %
142 (varname, map["IDENTITY"]))
143 append("%s.update({" % varname)
144 splits = 1
145 del map["IDENTITY"]
146 identity = 1
147 else:
148 append("%s = {" % varname)
149 splits = 0
150 identity = 0
151
152 mappings = sorted(map.items())
153 i = 0
154 key_precision, value_precision = precisions
155 for mapkey, mapvalue in mappings:
156 mapcomment = ''
157 if isinstance(mapkey, tuple):
158 (mapkey, mapcomment) = mapkey
159 if isinstance(mapvalue, tuple):
160 (mapvalue, mapcomment) = mapvalue
161 if mapkey is None:
162 continue
163 if (identity and
164 mapkey == mapvalue and
165 mapkey < 256):
166 # No need to include identity mappings, since these
167 # are already set for the first 256 code points.
168 continue
169 key = hexrepr(mapkey, key_precision)
170 value = hexrepr(mapvalue, value_precision)
171 if mapcomment and comments:
172 append(' %s: %s,\t# %s' % (key, value, mapcomment))
173 else:
174 append(' %s: %s,' % (key, value))
175 i += 1
176 if i == 4096:
177 # Split the definition into parts to that the Python
178 # parser doesn't dump core
179 if splits == 0:
180 append('}')
181 else:
182 append('})')
183 append('%s.update({' % varname)
184 i = 0
185 splits = splits + 1
186 if splits == 0:
187 append('}')
188 else:
189 append('})')
190
191 return l
192
193 def python_tabledef_code(varname, map, comments=1, key_precision=2):
194
195 l = []
196 append = l.append
197 append('%s = (' % varname)
198
199 # Analyze map and create table dict
200 mappings = sorted(map.items())
201 table = {}
202 maxkey = 0
203 if 'IDENTITY' in map:
204 for key in range(256):
205 table[key] = (key, '')
206 maxkey = 255
207 del map['IDENTITY']
208 for mapkey, mapvalue in mappings:
209 mapcomment = ''
210 if isinstance(mapkey, tuple):
211 (mapkey, mapcomment) = mapkey
212 if isinstance(mapvalue, tuple):
213 (mapvalue, mapcomment) = mapvalue
214 if mapkey is None:
215 continue
216 table[mapkey] = (mapvalue, mapcomment)
217 if mapkey > maxkey:
218 maxkey = mapkey
219 if maxkey > MAX_TABLE_SIZE:
220 # Table too large
221 return None
222
223 # Create table code
224 for key in range(maxkey + 1):
225 if key not in table:
226 mapvalue = None
227 mapcomment = 'UNDEFINED'
228 else:
229 mapvalue, mapcomment = table[key]
230 if mapvalue is None:
231 mapchar = UNI_UNDEFINED
232 else:
233 if isinstance(mapvalue, tuple):
234 # 1-n mappings not supported
235 return None
236 else:
237 mapchar = unichr(mapvalue)
238 if mapcomment and comments:
239 append(' %r\t# %s -> %s' % (mapchar,
240 hexrepr(key, key_precision),
241 mapcomment))
242 else:
243 append(' %r' % mapchar)
244
245 append(')')
246 return l
247
248 def codegen(name, map, encodingname, comments=1):
249
250 """ Returns Python source for the given map.
251
252 Comments are included in the source, if comments is true (default).
253
254 """
255 # Generate code
256 decoding_map_code = python_mapdef_code(
257 'decoding_map',
258 map,
259 comments=comments)
260 decoding_table_code = python_tabledef_code(
261 'decoding_table',
262 map,
263 comments=comments)
264 encoding_map_code = python_mapdef_code(
265 'encoding_map',
266 codecs.make_encoding_map(map),
267 comments=comments,
268 precisions=(4, 2))
269
270 if decoding_table_code:
271 suffix = 'table'
272 else:
273 suffix = 'map'
274
275 l = [
276 '''\
277 """ Python Character Mapping Codec %s generated from '%s' with gencodec.py.
278
279 """#"
280
281 import codecs
282
283 ### Codec APIs
284
285 class Codec(codecs.Codec):
286
287 def encode(self,input,errors='strict'):
288 return codecs.charmap_encode(input,errors,encoding_%s)
289
290 def decode(self,input,errors='strict'):
291 return codecs.charmap_decode(input,errors,decoding_%s)
292 ''' % (encodingname, name, suffix, suffix)]
293 l.append('''\
294 class IncrementalEncoder(codecs.IncrementalEncoder):
295 def encode(self, input, final=False):
296 return codecs.charmap_encode(input,self.errors,encoding_%s)[0]
297
298 class IncrementalDecoder(codecs.IncrementalDecoder):
299 def decode(self, input, final=False):
300 return codecs.charmap_decode(input,self.errors,decoding_%s)[0]''' %
301 (suffix, suffix))
302
303 l.append('''
304 class StreamWriter(Codec,codecs.StreamWriter):
305 pass
306
307 class StreamReader(Codec,codecs.StreamReader):
308 pass
309
310 ### encodings module API
311
312 def getregentry():
313 return codecs.CodecInfo(
314 name=%r,
315 encode=Codec().encode,
316 decode=Codec().decode,
317 incrementalencoder=IncrementalEncoder,
318 incrementaldecoder=IncrementalDecoder,
319 streamreader=StreamReader,
320 streamwriter=StreamWriter,
321 )
322 ''' % encodingname.replace('_', '-'))
323
324 # Add decoding table or map (with preference to the table)
325 if not decoding_table_code:
326 l.append('''
327 ### Decoding Map
328 ''')
329 l.extend(decoding_map_code)
330 else:
331 l.append('''
332 ### Decoding Table
333 ''')
334 l.extend(decoding_table_code)
335
336 # Add encoding map
337 if decoding_table_code:
338 l.append('''
339 ### Encoding table
340 encoding_table=codecs.charmap_build(decoding_table)
341 ''')
342 else:
343 l.append('''
344 ### Encoding Map
345 ''')
346 l.extend(encoding_map_code)
347
348 # Final new-line
349 l.append('')
350
351 return '\n'.join(l).expandtabs()
352
353 def pymap(name,map,pyfile,encodingname,comments=1):
354
355 code = codegen(name,map,encodingname,comments)
356 f = open(pyfile,'w')
357 f.write(code)
358 f.close()
359
360 def marshalmap(name,map,marshalfile):
361
362 d = {}
363 for e,(u,c) in map.items():
364 d[e] = (u,c)
365 f = open(marshalfile,'wb')
366 marshal.dump(d,f)
367 f.close()
368
369 def convertdir(dir, dirprefix='', nameprefix='', comments=1):
370
371 mapnames = os.listdir(dir)
372 for mapname in mapnames:
373 mappathname = os.path.join(dir, mapname)
374 if not os.path.isfile(mappathname):
375 continue
376 name = os.path.split(mapname)[1]
377 name = name.replace('-','_')
378 name = name.split('.')[0]
379 name = name.lower()
380 name = nameprefix + name
381 codefile = name + '.py'
382 marshalfile = name + '.mapping'
383 print 'converting %s to %s and %s' % (mapname,
384 dirprefix + codefile,
385 dirprefix + marshalfile)
386 try:
387 map = readmap(os.path.join(dir,mapname))
388 if not map:
389 print '* map is empty; skipping'
390 else:
391 pymap(mappathname, map, dirprefix + codefile,name,comments)
392 marshalmap(mappathname, map, dirprefix + marshalfile)
393 except ValueError, why:
394 print '* conversion failed: %s' % why
395 raise
396
397 def rewritepythondir(dir, dirprefix='', comments=1):
398
399 mapnames = os.listdir(dir)
400 for mapname in mapnames:
401 if not mapname.endswith('.mapping'):
402 continue
403 name = mapname[:-len('.mapping')]
404 codefile = name + '.py'
405 print 'converting %s to %s' % (mapname,
406 dirprefix + codefile)
407 try:
408 map = marshal.load(open(os.path.join(dir,mapname),
409 'rb'))
410 if not map:
411 print '* map is empty; skipping'
412 else:
413 pymap(mapname, map, dirprefix + codefile,name,comments)
414 except ValueError, why:
415 print '* conversion failed: %s' % why
416
417 if __name__ == '__main__':
418
419 import sys
420 if 1:
421 convertdir(*sys.argv[1:])
422 else:
423 rewritepythondir(*sys.argv[1:])