]>
git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencodec.py
1 """ Unicode Mapping Parser and Codec Generator.
3 This script parses Unicode mapping files as available from the Unicode
4 site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec
5 modules from them. The codecs use the standard character mapping codec
6 to actually apply the mapping.
8 Synopsis: gencodec.py dir codec_prefix
10 All files in dir are scanned and those producing non-empty mappings
11 will be written to <codec_prefix><mapname>.py with <mapname> being the
12 first part of the map's filename ('a' in a.b.c.txt) converted to
13 lowercase with hyphens replaced by underscores.
15 The tool also writes marshalled versions of the mapping tables to the
16 same location (with .mapping extension).
18 Written by Marc-Andre Lemburg (mal@lemburg.com).
20 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
21 (c) Copyright Guido van Rossum, 2000.
24 (c) Copyright Marc-Andre Lemburg, 2005.
25 Licensed to PSF under a Contributor Agreement.
29 import re
, os
, marshal
, codecs
31 # Maximum allowed size of charmap tables
34 # Standard undefined Unicode code point
35 UNI_UNDEFINED
= unichr(0xFFFE)
37 mapRE
= re
.compile('((?:0x[0-9a-fA-F]+\+?)+)'
39 '((?:(?:0x[0-9a-fA-Z]+|<[A-Za-z]+>)\+?)*)'
43 def parsecodes(codes
, len=len, range=range):
45 """ Converts code combinations to either a single code integer
46 or a tuple of integers.
48 meta-codes (in angular brackets, e.g. <LR> and <RL>) are
51 Empty codes or illegal ones are returned as None.
59 for i
in range(len(l
)):
64 l
= [x
for x
in l
if x
is not None]
70 def readmap(filename
):
72 f
= open(filename
,'r')
79 # UTC mapping tables per convention don't include the identity
80 # mappings for code points 0x00 - 0x1F and 0x7F, unless these are
81 # explicitly mapped to different characters or undefined
82 for i
in range(32) + [127]:
85 enc2uni
[i
] = (i
, 'CONTROL CHARACTER')
89 if not line
or line
[0] == '#':
93 #print '* not matched: %s' % repr(line)
95 enc
,uni
,comment
= m
.groups()
101 comment
= comment
[1:].strip()
107 enc2uni
[enc
] = (uni
,comment
)
109 enc2uni
[enc
] = (uni
,comment
)
111 # If there are more identity-mapped entries than unmapped entries,
112 # it pays to generate an identity dictionary first, and add explicit
113 # mappings to None for the rest
114 if len(identity
) >= len(unmapped
):
116 enc2uni
[enc
] = (None, "")
117 enc2uni
['IDENTITY'] = 256
121 def hexrepr(t
, precision
=4):
128 return '0x%0*X' % (precision
, t
)
130 return '(' + ', '.join(['0x%0*X' % (precision
, item
)
131 for item
in t
]) + ')'
132 except TypeError, why
:
133 print '* failed to convert %r: %s' % (t
, why
)
136 def python_mapdef_code(varname
, map, comments
=1, precisions
=(2, 4)):
140 if "IDENTITY" in map:
141 append("%s = codecs.make_identity_dict(range(%d))" %
142 (varname
, map["IDENTITY"]))
143 append("%s.update({" % varname
)
148 append("%s = {" % varname
)
152 mappings
= sorted(map.items())
154 key_precision
, value_precision
= precisions
155 for mapkey
, mapvalue
in mappings
:
157 if isinstance(mapkey
, tuple):
158 (mapkey
, mapcomment
) = mapkey
159 if isinstance(mapvalue
, tuple):
160 (mapvalue
, mapcomment
) = mapvalue
164 mapkey
== mapvalue
and
166 # No need to include identity mappings, since these
167 # are already set for the first 256 code points.
169 key
= hexrepr(mapkey
, key_precision
)
170 value
= hexrepr(mapvalue
, value_precision
)
171 if mapcomment
and comments
:
172 append(' %s: %s,\t# %s' % (key
, value
, mapcomment
))
174 append(' %s: %s,' % (key
, value
))
177 # Split the definition into parts to that the Python
178 # parser doesn't dump core
183 append('%s.update({' % varname
)
193 def python_tabledef_code(varname
, map, comments
=1, key_precision
=2):
197 append('%s = (' % varname
)
199 # Analyze map and create table dict
200 mappings
= sorted(map.items())
203 if 'IDENTITY' in map:
204 for key
in range(256):
205 table
[key
] = (key
, '')
208 for mapkey
, mapvalue
in mappings
:
210 if isinstance(mapkey
, tuple):
211 (mapkey
, mapcomment
) = mapkey
212 if isinstance(mapvalue
, tuple):
213 (mapvalue
, mapcomment
) = mapvalue
216 table
[mapkey
] = (mapvalue
, mapcomment
)
219 if maxkey
> MAX_TABLE_SIZE
:
224 for key
in range(maxkey
+ 1):
227 mapcomment
= 'UNDEFINED'
229 mapvalue
, mapcomment
= table
[key
]
231 mapchar
= UNI_UNDEFINED
233 if isinstance(mapvalue
, tuple):
234 # 1-n mappings not supported
237 mapchar
= unichr(mapvalue
)
238 if mapcomment
and comments
:
239 append(' %r\t# %s -> %s' % (mapchar
,
240 hexrepr(key
, key_precision
),
243 append(' %r' % mapchar
)
248 def codegen(name
, map, encodingname
, comments
=1):
250 """ Returns Python source for the given map.
252 Comments are included in the source, if comments is true (default).
256 decoding_map_code
= python_mapdef_code(
260 decoding_table_code
= python_tabledef_code(
264 encoding_map_code
= python_mapdef_code(
266 codecs
.make_encoding_map(map),
270 if decoding_table_code
:
277 """ Python Character Mapping Codec %s generated from '%s' with gencodec.py.
285 class Codec(codecs.Codec):
287 def encode(self,input,errors='strict'):
288 return codecs.charmap_encode(input,errors,encoding_%s)
290 def decode(self,input,errors='strict'):
291 return codecs.charmap_decode(input,errors,decoding_%s)
292 ''' % (encodingname
, name
, suffix
, suffix
)]
294 class IncrementalEncoder(codecs.IncrementalEncoder):
295 def encode(self, input, final=False):
296 return codecs.charmap_encode(input,self.errors,encoding_%s)[0]
298 class IncrementalDecoder(codecs.IncrementalDecoder):
299 def decode(self, input, final=False):
300 return codecs.charmap_decode(input,self.errors,decoding_%s)[0]''' %
304 class StreamWriter(Codec,codecs.StreamWriter):
307 class StreamReader(Codec,codecs.StreamReader):
310 ### encodings module API
313 return codecs.CodecInfo(
315 encode=Codec().encode,
316 decode=Codec().decode,
317 incrementalencoder=IncrementalEncoder,
318 incrementaldecoder=IncrementalDecoder,
319 streamreader=StreamReader,
320 streamwriter=StreamWriter,
322 ''' % encodingname
.replace('_', '-'))
324 # Add decoding table or map (with preference to the table)
325 if not decoding_table_code
:
329 l
.extend(decoding_map_code
)
334 l
.extend(decoding_table_code
)
337 if decoding_table_code
:
340 encoding_table=codecs.charmap_build(decoding_table)
346 l
.extend(encoding_map_code
)
351 return '\n'.join(l
).expandtabs()
353 def pymap(name
,map,pyfile
,encodingname
,comments
=1):
355 code
= codegen(name
,map,encodingname
,comments
)
360 def marshalmap(name
,map,marshalfile
):
363 for e
,(u
,c
) in map.items():
365 f
= open(marshalfile
,'wb')
369 def convertdir(dir, dirprefix
='', nameprefix
='', comments
=1):
371 mapnames
= os
.listdir(dir)
372 for mapname
in mapnames
:
373 mappathname
= os
.path
.join(dir, mapname
)
374 if not os
.path
.isfile(mappathname
):
376 name
= os
.path
.split(mapname
)[1]
377 name
= name
.replace('-','_')
378 name
= name
.split('.')[0]
380 name
= nameprefix
+ name
381 codefile
= name
+ '.py'
382 marshalfile
= name
+ '.mapping'
383 print 'converting %s to %s and %s' % (mapname
,
384 dirprefix
+ codefile
,
385 dirprefix
+ marshalfile
)
387 map = readmap(os
.path
.join(dir,mapname
))
389 print '* map is empty; skipping'
391 pymap(mappathname
, map, dirprefix
+ codefile
,name
,comments
)
392 marshalmap(mappathname
, map, dirprefix
+ marshalfile
)
393 except ValueError, why
:
394 print '* conversion failed: %s' % why
397 def rewritepythondir(dir, dirprefix
='', comments
=1):
399 mapnames
= os
.listdir(dir)
400 for mapname
in mapnames
:
401 if not mapname
.endswith('.mapping'):
403 name
= mapname
[:-len('.mapping')]
404 codefile
= name
+ '.py'
405 print 'converting %s to %s' % (mapname
,
406 dirprefix
+ codefile
)
408 map = marshal
.load(open(os
.path
.join(dir,mapname
),
411 print '* map is empty; skipping'
413 pymap(mapname
, map, dirprefix
+ codefile
,name
,comments
)
414 except ValueError, why
:
415 print '* conversion failed: %s' % why
417 if __name__
== '__main__':
421 convertdir(*sys
.argv
[1:])
423 rewritepythondir(*sys
.argv
[1:])